]> git.proxmox.com Git - mirror_qemu.git/blob - hw/net/virtio-net.c
Merge tag 'trivial-branch-for-7.2-pull-request' of https://gitlab.com/laurent_vivier...
[mirror_qemu.git] / hw / net / virtio-net.c
1 /*
2 * Virtio Network Device
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qapi/qmp/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "sysemu/sysemu.h"
43 #include "trace.h"
44 #include "monitor/qdev.h"
45 #include "hw/pci/pci.h"
46 #include "net_rx_pkt.h"
47 #include "hw/virtio/vhost.h"
48 #include "sysemu/qtest.h"
49
50 #define VIRTIO_NET_VM_VERSION 11
51
52 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
53
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61
62 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
63
64 #define VIRTIO_NET_TCP_FLAG 0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
66
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73
74 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76
77 /* Purge coalesced packets timer interval, This value affects the performance
78 a lot, and should be tuned carefully, '300000'(300us) is the recommended
79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80 tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92
93 static const VirtIOFeature feature_sizes[] = {
94 {.flags = 1ULL << VIRTIO_NET_F_MAC,
95 .end = endof(struct virtio_net_config, mac)},
96 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97 .end = endof(struct virtio_net_config, status)},
98 {.flags = 1ULL << VIRTIO_NET_F_MQ,
99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100 {.flags = 1ULL << VIRTIO_NET_F_MTU,
101 .end = endof(struct virtio_net_config, mtu)},
102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103 .end = endof(struct virtio_net_config, duplex)},
104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105 .end = endof(struct virtio_net_config, supported_hash_types)},
106 {}
107 };
108
109 static const VirtIOConfigSizeParams cfg_size_params = {
110 .min_size = endof(struct virtio_net_config, mac),
111 .max_size = sizeof(struct virtio_net_config),
112 .feature_sizes = feature_sizes
113 };
114
115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
116 {
117 VirtIONet *n = qemu_get_nic_opaque(nc);
118
119 return &n->vqs[nc->queue_index];
120 }
121
122 static int vq2q(int queue_index)
123 {
124 return queue_index / 2;
125 }
126
127 /* TODO
128 * - we could suppress RX interrupt if we were so inclined.
129 */
130
131 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
132 {
133 VirtIONet *n = VIRTIO_NET(vdev);
134 struct virtio_net_config netcfg;
135 NetClientState *nc = qemu_get_queue(n->nic);
136 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
137
138 int ret = 0;
139 memset(&netcfg, 0 , sizeof(struct virtio_net_config));
140 virtio_stw_p(vdev, &netcfg.status, n->status);
141 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
142 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
143 memcpy(netcfg.mac, n->mac, ETH_ALEN);
144 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
145 netcfg.duplex = n->net_conf.duplex;
146 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
147 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
148 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
149 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
150 virtio_stl_p(vdev, &netcfg.supported_hash_types,
151 VIRTIO_NET_RSS_SUPPORTED_HASHES);
152 memcpy(config, &netcfg, n->config_size);
153
154 /*
155 * Is this VDPA? No peer means not VDPA: there's no way to
156 * disconnect/reconnect a VDPA peer.
157 */
158 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
159 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
160 n->config_size);
161 if (ret != -1) {
162 /*
163 * Some NIC/kernel combinations present 0 as the mac address. As
164 * that is not a legal address, try to proceed with the
165 * address from the QEMU command line in the hope that the
166 * address has been configured correctly elsewhere - just not
167 * reported by the device.
168 */
169 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
170 info_report("Zero hardware mac address detected. Ignoring.");
171 memcpy(netcfg.mac, n->mac, ETH_ALEN);
172 }
173 memcpy(config, &netcfg, n->config_size);
174 }
175 }
176 }
177
178 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
179 {
180 VirtIONet *n = VIRTIO_NET(vdev);
181 struct virtio_net_config netcfg = {};
182 NetClientState *nc = qemu_get_queue(n->nic);
183
184 memcpy(&netcfg, config, n->config_size);
185
186 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
187 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
188 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
189 memcpy(n->mac, netcfg.mac, ETH_ALEN);
190 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
191 }
192
193 /*
194 * Is this VDPA? No peer means not VDPA: there's no way to
195 * disconnect/reconnect a VDPA peer.
196 */
197 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
198 vhost_net_set_config(get_vhost_net(nc->peer),
199 (uint8_t *)&netcfg, 0, n->config_size,
200 VHOST_SET_CONFIG_TYPE_MASTER);
201 }
202 }
203
204 static bool virtio_net_started(VirtIONet *n, uint8_t status)
205 {
206 VirtIODevice *vdev = VIRTIO_DEVICE(n);
207 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
208 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
209 }
210
211 static void virtio_net_announce_notify(VirtIONet *net)
212 {
213 VirtIODevice *vdev = VIRTIO_DEVICE(net);
214 trace_virtio_net_announce_notify();
215
216 net->status |= VIRTIO_NET_S_ANNOUNCE;
217 virtio_notify_config(vdev);
218 }
219
220 static void virtio_net_announce_timer(void *opaque)
221 {
222 VirtIONet *n = opaque;
223 trace_virtio_net_announce_timer(n->announce_timer.round);
224
225 n->announce_timer.round--;
226 virtio_net_announce_notify(n);
227 }
228
229 static void virtio_net_announce(NetClientState *nc)
230 {
231 VirtIONet *n = qemu_get_nic_opaque(nc);
232 VirtIODevice *vdev = VIRTIO_DEVICE(n);
233
234 /*
235 * Make sure the virtio migration announcement timer isn't running
236 * If it is, let it trigger announcement so that we do not cause
237 * confusion.
238 */
239 if (n->announce_timer.round) {
240 return;
241 }
242
243 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
244 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
245 virtio_net_announce_notify(n);
246 }
247 }
248
249 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
250 {
251 VirtIODevice *vdev = VIRTIO_DEVICE(n);
252 NetClientState *nc = qemu_get_queue(n->nic);
253 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
254 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
255 n->max_ncs - n->max_queue_pairs : 0;
256
257 if (!get_vhost_net(nc->peer)) {
258 return;
259 }
260
261 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
262 !!n->vhost_started) {
263 return;
264 }
265 if (!n->vhost_started) {
266 int r, i;
267
268 if (n->needs_vnet_hdr_swap) {
269 error_report("backend does not support %s vnet headers; "
270 "falling back on userspace virtio",
271 virtio_is_big_endian(vdev) ? "BE" : "LE");
272 return;
273 }
274
275 /* Any packets outstanding? Purge them to avoid touching rings
276 * when vhost is running.
277 */
278 for (i = 0; i < queue_pairs; i++) {
279 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
280
281 /* Purge both directions: TX and RX. */
282 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
283 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
284 }
285
286 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
287 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
288 if (r < 0) {
289 error_report("%uBytes MTU not supported by the backend",
290 n->net_conf.mtu);
291
292 return;
293 }
294 }
295
296 n->vhost_started = 1;
297 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
298 if (r < 0) {
299 error_report("unable to start vhost net: %d: "
300 "falling back on userspace virtio", -r);
301 n->vhost_started = 0;
302 }
303 } else {
304 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
305 n->vhost_started = 0;
306 }
307 }
308
309 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
310 NetClientState *peer,
311 bool enable)
312 {
313 if (virtio_is_big_endian(vdev)) {
314 return qemu_set_vnet_be(peer, enable);
315 } else {
316 return qemu_set_vnet_le(peer, enable);
317 }
318 }
319
320 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
321 int queue_pairs, bool enable)
322 {
323 int i;
324
325 for (i = 0; i < queue_pairs; i++) {
326 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
327 enable) {
328 while (--i >= 0) {
329 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
330 }
331
332 return true;
333 }
334 }
335
336 return false;
337 }
338
339 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
340 {
341 VirtIODevice *vdev = VIRTIO_DEVICE(n);
342 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
343
344 if (virtio_net_started(n, status)) {
345 /* Before using the device, we tell the network backend about the
346 * endianness to use when parsing vnet headers. If the backend
347 * can't do it, we fallback onto fixing the headers in the core
348 * virtio-net code.
349 */
350 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
351 queue_pairs, true);
352 } else if (virtio_net_started(n, vdev->status)) {
353 /* After using the device, we need to reset the network backend to
354 * the default (guest native endianness), otherwise the guest may
355 * lose network connectivity if it is rebooted into a different
356 * endianness.
357 */
358 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
359 }
360 }
361
362 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
363 {
364 unsigned int dropped = virtqueue_drop_all(vq);
365 if (dropped) {
366 virtio_notify(vdev, vq);
367 }
368 }
369
370 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
371 {
372 VirtIONet *n = VIRTIO_NET(vdev);
373 VirtIONetQueue *q;
374 int i;
375 uint8_t queue_status;
376
377 virtio_net_vnet_endian_status(n, status);
378 virtio_net_vhost_status(n, status);
379
380 for (i = 0; i < n->max_queue_pairs; i++) {
381 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
382 bool queue_started;
383 q = &n->vqs[i];
384
385 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
386 queue_status = 0;
387 } else {
388 queue_status = status;
389 }
390 queue_started =
391 virtio_net_started(n, queue_status) && !n->vhost_started;
392
393 if (queue_started) {
394 qemu_flush_queued_packets(ncs);
395 }
396
397 if (!q->tx_waiting) {
398 continue;
399 }
400
401 if (queue_started) {
402 if (q->tx_timer) {
403 timer_mod(q->tx_timer,
404 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
405 } else {
406 qemu_bh_schedule(q->tx_bh);
407 }
408 } else {
409 if (q->tx_timer) {
410 timer_del(q->tx_timer);
411 } else {
412 qemu_bh_cancel(q->tx_bh);
413 }
414 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
415 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
416 vdev->vm_running) {
417 /* if tx is waiting we are likely have some packets in tx queue
418 * and disabled notification */
419 q->tx_waiting = 0;
420 virtio_queue_set_notification(q->tx_vq, 1);
421 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
422 }
423 }
424 }
425 }
426
427 static void virtio_net_set_link_status(NetClientState *nc)
428 {
429 VirtIONet *n = qemu_get_nic_opaque(nc);
430 VirtIODevice *vdev = VIRTIO_DEVICE(n);
431 uint16_t old_status = n->status;
432
433 if (nc->link_down)
434 n->status &= ~VIRTIO_NET_S_LINK_UP;
435 else
436 n->status |= VIRTIO_NET_S_LINK_UP;
437
438 if (n->status != old_status)
439 virtio_notify_config(vdev);
440
441 virtio_net_set_status(vdev, vdev->status);
442 }
443
444 static void rxfilter_notify(NetClientState *nc)
445 {
446 VirtIONet *n = qemu_get_nic_opaque(nc);
447
448 if (nc->rxfilter_notify_enabled) {
449 char *path = object_get_canonical_path(OBJECT(n->qdev));
450 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
451 n->netclient_name, path);
452 g_free(path);
453
454 /* disable event notification to avoid events flooding */
455 nc->rxfilter_notify_enabled = 0;
456 }
457 }
458
459 static intList *get_vlan_table(VirtIONet *n)
460 {
461 intList *list;
462 int i, j;
463
464 list = NULL;
465 for (i = 0; i < MAX_VLAN >> 5; i++) {
466 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
467 if (n->vlans[i] & (1U << j)) {
468 QAPI_LIST_PREPEND(list, (i << 5) + j);
469 }
470 }
471 }
472
473 return list;
474 }
475
476 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
477 {
478 VirtIONet *n = qemu_get_nic_opaque(nc);
479 VirtIODevice *vdev = VIRTIO_DEVICE(n);
480 RxFilterInfo *info;
481 strList *str_list;
482 int i;
483
484 info = g_malloc0(sizeof(*info));
485 info->name = g_strdup(nc->name);
486 info->promiscuous = n->promisc;
487
488 if (n->nouni) {
489 info->unicast = RX_STATE_NONE;
490 } else if (n->alluni) {
491 info->unicast = RX_STATE_ALL;
492 } else {
493 info->unicast = RX_STATE_NORMAL;
494 }
495
496 if (n->nomulti) {
497 info->multicast = RX_STATE_NONE;
498 } else if (n->allmulti) {
499 info->multicast = RX_STATE_ALL;
500 } else {
501 info->multicast = RX_STATE_NORMAL;
502 }
503
504 info->broadcast_allowed = n->nobcast;
505 info->multicast_overflow = n->mac_table.multi_overflow;
506 info->unicast_overflow = n->mac_table.uni_overflow;
507
508 info->main_mac = qemu_mac_strdup_printf(n->mac);
509
510 str_list = NULL;
511 for (i = 0; i < n->mac_table.first_multi; i++) {
512 QAPI_LIST_PREPEND(str_list,
513 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
514 }
515 info->unicast_table = str_list;
516
517 str_list = NULL;
518 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
519 QAPI_LIST_PREPEND(str_list,
520 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
521 }
522 info->multicast_table = str_list;
523 info->vlan_table = get_vlan_table(n);
524
525 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
526 info->vlan = RX_STATE_ALL;
527 } else if (!info->vlan_table) {
528 info->vlan = RX_STATE_NONE;
529 } else {
530 info->vlan = RX_STATE_NORMAL;
531 }
532
533 /* enable event notification after query */
534 nc->rxfilter_notify_enabled = 1;
535
536 return info;
537 }
538
539 static void virtio_net_reset(VirtIODevice *vdev)
540 {
541 VirtIONet *n = VIRTIO_NET(vdev);
542 int i;
543
544 /* Reset back to compatibility mode */
545 n->promisc = 1;
546 n->allmulti = 0;
547 n->alluni = 0;
548 n->nomulti = 0;
549 n->nouni = 0;
550 n->nobcast = 0;
551 /* multiqueue is disabled by default */
552 n->curr_queue_pairs = 1;
553 timer_del(n->announce_timer.tm);
554 n->announce_timer.round = 0;
555 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
556
557 /* Flush any MAC and VLAN filter table state */
558 n->mac_table.in_use = 0;
559 n->mac_table.first_multi = 0;
560 n->mac_table.multi_overflow = 0;
561 n->mac_table.uni_overflow = 0;
562 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
563 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
564 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
565 memset(n->vlans, 0, MAX_VLAN >> 3);
566
567 /* Flush any async TX */
568 for (i = 0; i < n->max_queue_pairs; i++) {
569 NetClientState *nc = qemu_get_subqueue(n->nic, i);
570
571 if (nc->peer) {
572 qemu_flush_or_purge_queued_packets(nc->peer, true);
573 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
574 }
575 }
576 }
577
578 static void peer_test_vnet_hdr(VirtIONet *n)
579 {
580 NetClientState *nc = qemu_get_queue(n->nic);
581 if (!nc->peer) {
582 return;
583 }
584
585 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
586 }
587
588 static int peer_has_vnet_hdr(VirtIONet *n)
589 {
590 return n->has_vnet_hdr;
591 }
592
593 static int peer_has_ufo(VirtIONet *n)
594 {
595 if (!peer_has_vnet_hdr(n))
596 return 0;
597
598 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
599
600 return n->has_ufo;
601 }
602
603 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
604 int version_1, int hash_report)
605 {
606 int i;
607 NetClientState *nc;
608
609 n->mergeable_rx_bufs = mergeable_rx_bufs;
610
611 if (version_1) {
612 n->guest_hdr_len = hash_report ?
613 sizeof(struct virtio_net_hdr_v1_hash) :
614 sizeof(struct virtio_net_hdr_mrg_rxbuf);
615 n->rss_data.populate_hash = !!hash_report;
616 } else {
617 n->guest_hdr_len = n->mergeable_rx_bufs ?
618 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
619 sizeof(struct virtio_net_hdr);
620 }
621
622 for (i = 0; i < n->max_queue_pairs; i++) {
623 nc = qemu_get_subqueue(n->nic, i);
624
625 if (peer_has_vnet_hdr(n) &&
626 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
627 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
628 n->host_hdr_len = n->guest_hdr_len;
629 }
630 }
631 }
632
633 static int virtio_net_max_tx_queue_size(VirtIONet *n)
634 {
635 NetClientState *peer = n->nic_conf.peers.ncs[0];
636
637 /*
638 * Backends other than vhost-user or vhost-vdpa don't support max queue
639 * size.
640 */
641 if (!peer) {
642 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
643 }
644
645 switch(peer->info->type) {
646 case NET_CLIENT_DRIVER_VHOST_USER:
647 case NET_CLIENT_DRIVER_VHOST_VDPA:
648 return VIRTQUEUE_MAX_SIZE;
649 default:
650 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
651 };
652 }
653
654 static int peer_attach(VirtIONet *n, int index)
655 {
656 NetClientState *nc = qemu_get_subqueue(n->nic, index);
657
658 if (!nc->peer) {
659 return 0;
660 }
661
662 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
663 vhost_set_vring_enable(nc->peer, 1);
664 }
665
666 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
667 return 0;
668 }
669
670 if (n->max_queue_pairs == 1) {
671 return 0;
672 }
673
674 return tap_enable(nc->peer);
675 }
676
677 static int peer_detach(VirtIONet *n, int index)
678 {
679 NetClientState *nc = qemu_get_subqueue(n->nic, index);
680
681 if (!nc->peer) {
682 return 0;
683 }
684
685 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
686 vhost_set_vring_enable(nc->peer, 0);
687 }
688
689 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
690 return 0;
691 }
692
693 return tap_disable(nc->peer);
694 }
695
696 static void virtio_net_set_queue_pairs(VirtIONet *n)
697 {
698 int i;
699 int r;
700
701 if (n->nic->peer_deleted) {
702 return;
703 }
704
705 for (i = 0; i < n->max_queue_pairs; i++) {
706 if (i < n->curr_queue_pairs) {
707 r = peer_attach(n, i);
708 assert(!r);
709 } else {
710 r = peer_detach(n, i);
711 assert(!r);
712 }
713 }
714 }
715
716 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
717
718 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
719 Error **errp)
720 {
721 VirtIONet *n = VIRTIO_NET(vdev);
722 NetClientState *nc = qemu_get_queue(n->nic);
723
724 /* Firstly sync all virtio-net possible supported features */
725 features |= n->host_features;
726
727 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
728
729 if (!peer_has_vnet_hdr(n)) {
730 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
731 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
732 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
733 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
734
735 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
736 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
737 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
738 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
739
740 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
741 }
742
743 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
744 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
745 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
746 }
747
748 if (!get_vhost_net(nc->peer)) {
749 return features;
750 }
751
752 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
753 virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
754 }
755 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
756 vdev->backend_features = features;
757
758 if (n->mtu_bypass_backend &&
759 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
760 features |= (1ULL << VIRTIO_NET_F_MTU);
761 }
762
763 return features;
764 }
765
766 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
767 {
768 uint64_t features = 0;
769
770 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
771 * but also these: */
772 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
773 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
774 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
775 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
776 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
777
778 return features;
779 }
780
781 static void virtio_net_apply_guest_offloads(VirtIONet *n)
782 {
783 qemu_set_offload(qemu_get_queue(n->nic)->peer,
784 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
785 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
786 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
787 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
788 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
789 }
790
791 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
792 {
793 static const uint64_t guest_offloads_mask =
794 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
795 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
796 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
797 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
798 (1ULL << VIRTIO_NET_F_GUEST_UFO);
799
800 return guest_offloads_mask & features;
801 }
802
803 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
804 {
805 VirtIODevice *vdev = VIRTIO_DEVICE(n);
806 return virtio_net_guest_offloads_by_features(vdev->guest_features);
807 }
808
809 typedef struct {
810 VirtIONet *n;
811 DeviceState *dev;
812 } FailoverDevice;
813
814 /**
815 * Set the failover primary device
816 *
817 * @opaque: FailoverId to setup
818 * @opts: opts for device we are handling
819 * @errp: returns an error if this function fails
820 */
821 static int failover_set_primary(DeviceState *dev, void *opaque)
822 {
823 FailoverDevice *fdev = opaque;
824 PCIDevice *pci_dev = (PCIDevice *)
825 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
826
827 if (!pci_dev) {
828 return 0;
829 }
830
831 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
832 fdev->dev = dev;
833 return 1;
834 }
835
836 return 0;
837 }
838
839 /**
840 * Find the primary device for this failover virtio-net
841 *
842 * @n: VirtIONet device
843 * @errp: returns an error if this function fails
844 */
845 static DeviceState *failover_find_primary_device(VirtIONet *n)
846 {
847 FailoverDevice fdev = {
848 .n = n,
849 };
850
851 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
852 NULL, NULL, &fdev);
853 return fdev.dev;
854 }
855
856 static void failover_add_primary(VirtIONet *n, Error **errp)
857 {
858 Error *err = NULL;
859 DeviceState *dev = failover_find_primary_device(n);
860
861 if (dev) {
862 return;
863 }
864
865 if (!n->primary_opts) {
866 error_setg(errp, "Primary device not found");
867 error_append_hint(errp, "Virtio-net failover will not work. Make "
868 "sure primary device has parameter"
869 " failover_pair_id=%s\n", n->netclient_name);
870 return;
871 }
872
873 dev = qdev_device_add_from_qdict(n->primary_opts,
874 n->primary_opts_from_json,
875 &err);
876 if (err) {
877 qobject_unref(n->primary_opts);
878 n->primary_opts = NULL;
879 } else {
880 object_unref(OBJECT(dev));
881 }
882 error_propagate(errp, err);
883 }
884
885 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
886 {
887 VirtIONet *n = VIRTIO_NET(vdev);
888 Error *err = NULL;
889 int i;
890
891 if (n->mtu_bypass_backend &&
892 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
893 features &= ~(1ULL << VIRTIO_NET_F_MTU);
894 }
895
896 virtio_net_set_multiqueue(n,
897 virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
898 virtio_has_feature(features, VIRTIO_NET_F_MQ));
899
900 virtio_net_set_mrg_rx_bufs(n,
901 virtio_has_feature(features,
902 VIRTIO_NET_F_MRG_RXBUF),
903 virtio_has_feature(features,
904 VIRTIO_F_VERSION_1),
905 virtio_has_feature(features,
906 VIRTIO_NET_F_HASH_REPORT));
907
908 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
909 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
910 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
911 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
912 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
913
914 if (n->has_vnet_hdr) {
915 n->curr_guest_offloads =
916 virtio_net_guest_offloads_by_features(features);
917 virtio_net_apply_guest_offloads(n);
918 }
919
920 for (i = 0; i < n->max_queue_pairs; i++) {
921 NetClientState *nc = qemu_get_subqueue(n->nic, i);
922
923 if (!get_vhost_net(nc->peer)) {
924 continue;
925 }
926 vhost_net_ack_features(get_vhost_net(nc->peer), features);
927 }
928
929 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
930 memset(n->vlans, 0, MAX_VLAN >> 3);
931 } else {
932 memset(n->vlans, 0xff, MAX_VLAN >> 3);
933 }
934
935 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
936 qapi_event_send_failover_negotiated(n->netclient_name);
937 qatomic_set(&n->failover_primary_hidden, false);
938 failover_add_primary(n, &err);
939 if (err) {
940 if (!qtest_enabled()) {
941 warn_report_err(err);
942 } else {
943 error_free(err);
944 }
945 }
946 }
947 }
948
949 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
950 struct iovec *iov, unsigned int iov_cnt)
951 {
952 uint8_t on;
953 size_t s;
954 NetClientState *nc = qemu_get_queue(n->nic);
955
956 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
957 if (s != sizeof(on)) {
958 return VIRTIO_NET_ERR;
959 }
960
961 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
962 n->promisc = on;
963 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
964 n->allmulti = on;
965 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
966 n->alluni = on;
967 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
968 n->nomulti = on;
969 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
970 n->nouni = on;
971 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
972 n->nobcast = on;
973 } else {
974 return VIRTIO_NET_ERR;
975 }
976
977 rxfilter_notify(nc);
978
979 return VIRTIO_NET_OK;
980 }
981
982 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
983 struct iovec *iov, unsigned int iov_cnt)
984 {
985 VirtIODevice *vdev = VIRTIO_DEVICE(n);
986 uint64_t offloads;
987 size_t s;
988
989 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
990 return VIRTIO_NET_ERR;
991 }
992
993 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
994 if (s != sizeof(offloads)) {
995 return VIRTIO_NET_ERR;
996 }
997
998 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
999 uint64_t supported_offloads;
1000
1001 offloads = virtio_ldq_p(vdev, &offloads);
1002
1003 if (!n->has_vnet_hdr) {
1004 return VIRTIO_NET_ERR;
1005 }
1006
1007 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1008 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1009 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1010 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1011 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1012
1013 supported_offloads = virtio_net_supported_guest_offloads(n);
1014 if (offloads & ~supported_offloads) {
1015 return VIRTIO_NET_ERR;
1016 }
1017
1018 n->curr_guest_offloads = offloads;
1019 virtio_net_apply_guest_offloads(n);
1020
1021 return VIRTIO_NET_OK;
1022 } else {
1023 return VIRTIO_NET_ERR;
1024 }
1025 }
1026
1027 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1028 struct iovec *iov, unsigned int iov_cnt)
1029 {
1030 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1031 struct virtio_net_ctrl_mac mac_data;
1032 size_t s;
1033 NetClientState *nc = qemu_get_queue(n->nic);
1034
1035 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1036 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1037 return VIRTIO_NET_ERR;
1038 }
1039 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1040 assert(s == sizeof(n->mac));
1041 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1042 rxfilter_notify(nc);
1043
1044 return VIRTIO_NET_OK;
1045 }
1046
1047 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1048 return VIRTIO_NET_ERR;
1049 }
1050
1051 int in_use = 0;
1052 int first_multi = 0;
1053 uint8_t uni_overflow = 0;
1054 uint8_t multi_overflow = 0;
1055 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1056
1057 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1058 sizeof(mac_data.entries));
1059 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1060 if (s != sizeof(mac_data.entries)) {
1061 goto error;
1062 }
1063 iov_discard_front(&iov, &iov_cnt, s);
1064
1065 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1066 goto error;
1067 }
1068
1069 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1070 s = iov_to_buf(iov, iov_cnt, 0, macs,
1071 mac_data.entries * ETH_ALEN);
1072 if (s != mac_data.entries * ETH_ALEN) {
1073 goto error;
1074 }
1075 in_use += mac_data.entries;
1076 } else {
1077 uni_overflow = 1;
1078 }
1079
1080 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1081
1082 first_multi = in_use;
1083
1084 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1085 sizeof(mac_data.entries));
1086 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1087 if (s != sizeof(mac_data.entries)) {
1088 goto error;
1089 }
1090
1091 iov_discard_front(&iov, &iov_cnt, s);
1092
1093 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1094 goto error;
1095 }
1096
1097 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1098 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1099 mac_data.entries * ETH_ALEN);
1100 if (s != mac_data.entries * ETH_ALEN) {
1101 goto error;
1102 }
1103 in_use += mac_data.entries;
1104 } else {
1105 multi_overflow = 1;
1106 }
1107
1108 n->mac_table.in_use = in_use;
1109 n->mac_table.first_multi = first_multi;
1110 n->mac_table.uni_overflow = uni_overflow;
1111 n->mac_table.multi_overflow = multi_overflow;
1112 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1113 g_free(macs);
1114 rxfilter_notify(nc);
1115
1116 return VIRTIO_NET_OK;
1117
1118 error:
1119 g_free(macs);
1120 return VIRTIO_NET_ERR;
1121 }
1122
1123 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1124 struct iovec *iov, unsigned int iov_cnt)
1125 {
1126 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1127 uint16_t vid;
1128 size_t s;
1129 NetClientState *nc = qemu_get_queue(n->nic);
1130
1131 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1132 vid = virtio_lduw_p(vdev, &vid);
1133 if (s != sizeof(vid)) {
1134 return VIRTIO_NET_ERR;
1135 }
1136
1137 if (vid >= MAX_VLAN)
1138 return VIRTIO_NET_ERR;
1139
1140 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1141 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1142 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1143 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1144 else
1145 return VIRTIO_NET_ERR;
1146
1147 rxfilter_notify(nc);
1148
1149 return VIRTIO_NET_OK;
1150 }
1151
1152 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1153 struct iovec *iov, unsigned int iov_cnt)
1154 {
1155 trace_virtio_net_handle_announce(n->announce_timer.round);
1156 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1157 n->status & VIRTIO_NET_S_ANNOUNCE) {
1158 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1159 if (n->announce_timer.round) {
1160 qemu_announce_timer_step(&n->announce_timer);
1161 }
1162 return VIRTIO_NET_OK;
1163 } else {
1164 return VIRTIO_NET_ERR;
1165 }
1166 }
1167
1168 static void virtio_net_detach_epbf_rss(VirtIONet *n);
1169
1170 static void virtio_net_disable_rss(VirtIONet *n)
1171 {
1172 if (n->rss_data.enabled) {
1173 trace_virtio_net_rss_disable();
1174 }
1175 n->rss_data.enabled = false;
1176
1177 virtio_net_detach_epbf_rss(n);
1178 }
1179
1180 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1181 {
1182 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1183 if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1184 return false;
1185 }
1186
1187 return nc->info->set_steering_ebpf(nc, prog_fd);
1188 }
1189
1190 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1191 struct EBPFRSSConfig *config)
1192 {
1193 config->redirect = data->redirect;
1194 config->populate_hash = data->populate_hash;
1195 config->hash_types = data->hash_types;
1196 config->indirections_len = data->indirections_len;
1197 config->default_queue = data->default_queue;
1198 }
1199
1200 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1201 {
1202 struct EBPFRSSConfig config = {};
1203
1204 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1205 return false;
1206 }
1207
1208 rss_data_to_rss_config(&n->rss_data, &config);
1209
1210 if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1211 n->rss_data.indirections_table, n->rss_data.key)) {
1212 return false;
1213 }
1214
1215 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1216 return false;
1217 }
1218
1219 return true;
1220 }
1221
1222 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1223 {
1224 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1225 }
1226
1227 static bool virtio_net_load_ebpf(VirtIONet *n)
1228 {
1229 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1230 /* backend does't support steering ebpf */
1231 return false;
1232 }
1233
1234 return ebpf_rss_load(&n->ebpf_rss);
1235 }
1236
1237 static void virtio_net_unload_ebpf(VirtIONet *n)
1238 {
1239 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1240 ebpf_rss_unload(&n->ebpf_rss);
1241 }
1242
1243 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1244 struct iovec *iov,
1245 unsigned int iov_cnt,
1246 bool do_rss)
1247 {
1248 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1249 struct virtio_net_rss_config cfg;
1250 size_t s, offset = 0, size_get;
1251 uint16_t queue_pairs, i;
1252 struct {
1253 uint16_t us;
1254 uint8_t b;
1255 } QEMU_PACKED temp;
1256 const char *err_msg = "";
1257 uint32_t err_value = 0;
1258
1259 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1260 err_msg = "RSS is not negotiated";
1261 goto error;
1262 }
1263 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1264 err_msg = "Hash report is not negotiated";
1265 goto error;
1266 }
1267 size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1268 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1269 if (s != size_get) {
1270 err_msg = "Short command buffer";
1271 err_value = (uint32_t)s;
1272 goto error;
1273 }
1274 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1275 n->rss_data.indirections_len =
1276 virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1277 n->rss_data.indirections_len++;
1278 if (!do_rss) {
1279 n->rss_data.indirections_len = 1;
1280 }
1281 if (!is_power_of_2(n->rss_data.indirections_len)) {
1282 err_msg = "Invalid size of indirection table";
1283 err_value = n->rss_data.indirections_len;
1284 goto error;
1285 }
1286 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1287 err_msg = "Too large indirection table";
1288 err_value = n->rss_data.indirections_len;
1289 goto error;
1290 }
1291 n->rss_data.default_queue = do_rss ?
1292 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1293 if (n->rss_data.default_queue >= n->max_queue_pairs) {
1294 err_msg = "Invalid default queue";
1295 err_value = n->rss_data.default_queue;
1296 goto error;
1297 }
1298 offset += size_get;
1299 size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1300 g_free(n->rss_data.indirections_table);
1301 n->rss_data.indirections_table = g_malloc(size_get);
1302 if (!n->rss_data.indirections_table) {
1303 err_msg = "Can't allocate indirections table";
1304 err_value = n->rss_data.indirections_len;
1305 goto error;
1306 }
1307 s = iov_to_buf(iov, iov_cnt, offset,
1308 n->rss_data.indirections_table, size_get);
1309 if (s != size_get) {
1310 err_msg = "Short indirection table buffer";
1311 err_value = (uint32_t)s;
1312 goto error;
1313 }
1314 for (i = 0; i < n->rss_data.indirections_len; ++i) {
1315 uint16_t val = n->rss_data.indirections_table[i];
1316 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1317 }
1318 offset += size_get;
1319 size_get = sizeof(temp);
1320 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1321 if (s != size_get) {
1322 err_msg = "Can't get queue_pairs";
1323 err_value = (uint32_t)s;
1324 goto error;
1325 }
1326 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1327 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1328 err_msg = "Invalid number of queue_pairs";
1329 err_value = queue_pairs;
1330 goto error;
1331 }
1332 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1333 err_msg = "Invalid key size";
1334 err_value = temp.b;
1335 goto error;
1336 }
1337 if (!temp.b && n->rss_data.hash_types) {
1338 err_msg = "No key provided";
1339 err_value = 0;
1340 goto error;
1341 }
1342 if (!temp.b && !n->rss_data.hash_types) {
1343 virtio_net_disable_rss(n);
1344 return queue_pairs;
1345 }
1346 offset += size_get;
1347 size_get = temp.b;
1348 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1349 if (s != size_get) {
1350 err_msg = "Can get key buffer";
1351 err_value = (uint32_t)s;
1352 goto error;
1353 }
1354 n->rss_data.enabled = true;
1355
1356 if (!n->rss_data.populate_hash) {
1357 if (!virtio_net_attach_epbf_rss(n)) {
1358 /* EBPF must be loaded for vhost */
1359 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1360 warn_report("Can't load eBPF RSS for vhost");
1361 goto error;
1362 }
1363 /* fallback to software RSS */
1364 warn_report("Can't load eBPF RSS - fallback to software RSS");
1365 n->rss_data.enabled_software_rss = true;
1366 }
1367 } else {
1368 /* use software RSS for hash populating */
1369 /* and detach eBPF if was loaded before */
1370 virtio_net_detach_epbf_rss(n);
1371 n->rss_data.enabled_software_rss = true;
1372 }
1373
1374 trace_virtio_net_rss_enable(n->rss_data.hash_types,
1375 n->rss_data.indirections_len,
1376 temp.b);
1377 return queue_pairs;
1378 error:
1379 trace_virtio_net_rss_error(err_msg, err_value);
1380 virtio_net_disable_rss(n);
1381 return 0;
1382 }
1383
1384 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1385 struct iovec *iov, unsigned int iov_cnt)
1386 {
1387 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1388 uint16_t queue_pairs;
1389 NetClientState *nc = qemu_get_queue(n->nic);
1390
1391 virtio_net_disable_rss(n);
1392 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1393 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1394 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1395 }
1396 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1397 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1398 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1399 struct virtio_net_ctrl_mq mq;
1400 size_t s;
1401 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1402 return VIRTIO_NET_ERR;
1403 }
1404 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1405 if (s != sizeof(mq)) {
1406 return VIRTIO_NET_ERR;
1407 }
1408 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1409
1410 } else {
1411 return VIRTIO_NET_ERR;
1412 }
1413
1414 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1415 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1416 queue_pairs > n->max_queue_pairs ||
1417 !n->multiqueue) {
1418 return VIRTIO_NET_ERR;
1419 }
1420
1421 n->curr_queue_pairs = queue_pairs;
1422 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1423 /*
1424 * Avoid updating the backend for a vdpa device: We're only interested
1425 * in updating the device model queues.
1426 */
1427 return VIRTIO_NET_OK;
1428 }
1429 /* stop the backend before changing the number of queue_pairs to avoid handling a
1430 * disabled queue */
1431 virtio_net_set_status(vdev, vdev->status);
1432 virtio_net_set_queue_pairs(n);
1433
1434 return VIRTIO_NET_OK;
1435 }
1436
1437 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1438 const struct iovec *in_sg, unsigned in_num,
1439 const struct iovec *out_sg,
1440 unsigned out_num)
1441 {
1442 VirtIONet *n = VIRTIO_NET(vdev);
1443 struct virtio_net_ctrl_hdr ctrl;
1444 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1445 size_t s;
1446 struct iovec *iov, *iov2;
1447
1448 if (iov_size(in_sg, in_num) < sizeof(status) ||
1449 iov_size(out_sg, out_num) < sizeof(ctrl)) {
1450 virtio_error(vdev, "virtio-net ctrl missing headers");
1451 return 0;
1452 }
1453
1454 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1455 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1456 iov_discard_front(&iov, &out_num, sizeof(ctrl));
1457 if (s != sizeof(ctrl)) {
1458 status = VIRTIO_NET_ERR;
1459 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1460 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1461 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1462 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1463 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1464 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1465 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1466 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1467 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1468 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1469 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1470 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1471 }
1472
1473 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1474 assert(s == sizeof(status));
1475
1476 g_free(iov2);
1477 return sizeof(status);
1478 }
1479
1480 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1481 {
1482 VirtQueueElement *elem;
1483
1484 for (;;) {
1485 size_t written;
1486 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1487 if (!elem) {
1488 break;
1489 }
1490
1491 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1492 elem->out_sg, elem->out_num);
1493 if (written > 0) {
1494 virtqueue_push(vq, elem, written);
1495 virtio_notify(vdev, vq);
1496 g_free(elem);
1497 } else {
1498 virtqueue_detach_element(vq, elem, 0);
1499 g_free(elem);
1500 break;
1501 }
1502 }
1503 }
1504
1505 /* RX */
1506
1507 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1508 {
1509 VirtIONet *n = VIRTIO_NET(vdev);
1510 int queue_index = vq2q(virtio_get_queue_index(vq));
1511
1512 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1513 }
1514
1515 static bool virtio_net_can_receive(NetClientState *nc)
1516 {
1517 VirtIONet *n = qemu_get_nic_opaque(nc);
1518 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1519 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1520
1521 if (!vdev->vm_running) {
1522 return false;
1523 }
1524
1525 if (nc->queue_index >= n->curr_queue_pairs) {
1526 return false;
1527 }
1528
1529 if (!virtio_queue_ready(q->rx_vq) ||
1530 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1531 return false;
1532 }
1533
1534 return true;
1535 }
1536
1537 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1538 {
1539 VirtIONet *n = q->n;
1540 if (virtio_queue_empty(q->rx_vq) ||
1541 (n->mergeable_rx_bufs &&
1542 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1543 virtio_queue_set_notification(q->rx_vq, 1);
1544
1545 /* To avoid a race condition where the guest has made some buffers
1546 * available after the above check but before notification was
1547 * enabled, check for available buffers again.
1548 */
1549 if (virtio_queue_empty(q->rx_vq) ||
1550 (n->mergeable_rx_bufs &&
1551 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1552 return 0;
1553 }
1554 }
1555
1556 virtio_queue_set_notification(q->rx_vq, 0);
1557 return 1;
1558 }
1559
1560 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1561 {
1562 virtio_tswap16s(vdev, &hdr->hdr_len);
1563 virtio_tswap16s(vdev, &hdr->gso_size);
1564 virtio_tswap16s(vdev, &hdr->csum_start);
1565 virtio_tswap16s(vdev, &hdr->csum_offset);
1566 }
1567
1568 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1569 * it never finds out that the packets don't have valid checksums. This
1570 * causes dhclient to get upset. Fedora's carried a patch for ages to
1571 * fix this with Xen but it hasn't appeared in an upstream release of
1572 * dhclient yet.
1573 *
1574 * To avoid breaking existing guests, we catch udp packets and add
1575 * checksums. This is terrible but it's better than hacking the guest
1576 * kernels.
1577 *
1578 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1579 * we should provide a mechanism to disable it to avoid polluting the host
1580 * cache.
1581 */
1582 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1583 uint8_t *buf, size_t size)
1584 {
1585 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1586 (size > 27 && size < 1500) && /* normal sized MTU */
1587 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1588 (buf[23] == 17) && /* ip.protocol == UDP */
1589 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1590 net_checksum_calculate(buf, size, CSUM_UDP);
1591 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1592 }
1593 }
1594
1595 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1596 const void *buf, size_t size)
1597 {
1598 if (n->has_vnet_hdr) {
1599 /* FIXME this cast is evil */
1600 void *wbuf = (void *)buf;
1601 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1602 size - n->host_hdr_len);
1603
1604 if (n->needs_vnet_hdr_swap) {
1605 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1606 }
1607 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1608 } else {
1609 struct virtio_net_hdr hdr = {
1610 .flags = 0,
1611 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1612 };
1613 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1614 }
1615 }
1616
1617 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1618 {
1619 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1620 static const uint8_t vlan[] = {0x81, 0x00};
1621 uint8_t *ptr = (uint8_t *)buf;
1622 int i;
1623
1624 if (n->promisc)
1625 return 1;
1626
1627 ptr += n->host_hdr_len;
1628
1629 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1630 int vid = lduw_be_p(ptr + 14) & 0xfff;
1631 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1632 return 0;
1633 }
1634
1635 if (ptr[0] & 1) { // multicast
1636 if (!memcmp(ptr, bcast, sizeof(bcast))) {
1637 return !n->nobcast;
1638 } else if (n->nomulti) {
1639 return 0;
1640 } else if (n->allmulti || n->mac_table.multi_overflow) {
1641 return 1;
1642 }
1643
1644 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1645 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1646 return 1;
1647 }
1648 }
1649 } else { // unicast
1650 if (n->nouni) {
1651 return 0;
1652 } else if (n->alluni || n->mac_table.uni_overflow) {
1653 return 1;
1654 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1655 return 1;
1656 }
1657
1658 for (i = 0; i < n->mac_table.first_multi; i++) {
1659 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1660 return 1;
1661 }
1662 }
1663 }
1664
1665 return 0;
1666 }
1667
1668 static uint8_t virtio_net_get_hash_type(bool isip4,
1669 bool isip6,
1670 bool isudp,
1671 bool istcp,
1672 uint32_t types)
1673 {
1674 if (isip4) {
1675 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1676 return NetPktRssIpV4Tcp;
1677 }
1678 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1679 return NetPktRssIpV4Udp;
1680 }
1681 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1682 return NetPktRssIpV4;
1683 }
1684 } else if (isip6) {
1685 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1686 VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1687
1688 if (istcp && (types & mask)) {
1689 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1690 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1691 }
1692 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1693 if (isudp && (types & mask)) {
1694 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1695 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1696 }
1697 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1698 if (types & mask) {
1699 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1700 NetPktRssIpV6Ex : NetPktRssIpV6;
1701 }
1702 }
1703 return 0xff;
1704 }
1705
1706 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1707 uint32_t hash)
1708 {
1709 struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1710 hdr->hash_value = hash;
1711 hdr->hash_report = report;
1712 }
1713
1714 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1715 size_t size)
1716 {
1717 VirtIONet *n = qemu_get_nic_opaque(nc);
1718 unsigned int index = nc->queue_index, new_index = index;
1719 struct NetRxPkt *pkt = n->rx_pkt;
1720 uint8_t net_hash_type;
1721 uint32_t hash;
1722 bool isip4, isip6, isudp, istcp;
1723 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1724 VIRTIO_NET_HASH_REPORT_IPv4,
1725 VIRTIO_NET_HASH_REPORT_TCPv4,
1726 VIRTIO_NET_HASH_REPORT_TCPv6,
1727 VIRTIO_NET_HASH_REPORT_IPv6,
1728 VIRTIO_NET_HASH_REPORT_IPv6_EX,
1729 VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1730 VIRTIO_NET_HASH_REPORT_UDPv4,
1731 VIRTIO_NET_HASH_REPORT_UDPv6,
1732 VIRTIO_NET_HASH_REPORT_UDPv6_EX
1733 };
1734
1735 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1736 size - n->host_hdr_len);
1737 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1738 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1739 istcp = isudp = false;
1740 }
1741 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1742 istcp = isudp = false;
1743 }
1744 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1745 n->rss_data.hash_types);
1746 if (net_hash_type > NetPktRssIpV6UdpEx) {
1747 if (n->rss_data.populate_hash) {
1748 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1749 }
1750 return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1751 }
1752
1753 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1754
1755 if (n->rss_data.populate_hash) {
1756 virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1757 }
1758
1759 if (n->rss_data.redirect) {
1760 new_index = hash & (n->rss_data.indirections_len - 1);
1761 new_index = n->rss_data.indirections_table[new_index];
1762 }
1763
1764 return (index == new_index) ? -1 : new_index;
1765 }
1766
1767 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1768 size_t size, bool no_rss)
1769 {
1770 VirtIONet *n = qemu_get_nic_opaque(nc);
1771 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1772 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1773 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1774 size_t lens[VIRTQUEUE_MAX_SIZE];
1775 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1776 struct virtio_net_hdr_mrg_rxbuf mhdr;
1777 unsigned mhdr_cnt = 0;
1778 size_t offset, i, guest_offset, j;
1779 ssize_t err;
1780
1781 if (!virtio_net_can_receive(nc)) {
1782 return -1;
1783 }
1784
1785 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1786 int index = virtio_net_process_rss(nc, buf, size);
1787 if (index >= 0) {
1788 NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1789 return virtio_net_receive_rcu(nc2, buf, size, true);
1790 }
1791 }
1792
1793 /* hdr_len refers to the header we supply to the guest */
1794 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1795 return 0;
1796 }
1797
1798 if (!receive_filter(n, buf, size))
1799 return size;
1800
1801 offset = i = 0;
1802
1803 while (offset < size) {
1804 VirtQueueElement *elem;
1805 int len, total;
1806 const struct iovec *sg;
1807
1808 total = 0;
1809
1810 if (i == VIRTQUEUE_MAX_SIZE) {
1811 virtio_error(vdev, "virtio-net unexpected long buffer chain");
1812 err = size;
1813 goto err;
1814 }
1815
1816 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1817 if (!elem) {
1818 if (i) {
1819 virtio_error(vdev, "virtio-net unexpected empty queue: "
1820 "i %zd mergeable %d offset %zd, size %zd, "
1821 "guest hdr len %zd, host hdr len %zd "
1822 "guest features 0x%" PRIx64,
1823 i, n->mergeable_rx_bufs, offset, size,
1824 n->guest_hdr_len, n->host_hdr_len,
1825 vdev->guest_features);
1826 }
1827 err = -1;
1828 goto err;
1829 }
1830
1831 if (elem->in_num < 1) {
1832 virtio_error(vdev,
1833 "virtio-net receive queue contains no in buffers");
1834 virtqueue_detach_element(q->rx_vq, elem, 0);
1835 g_free(elem);
1836 err = -1;
1837 goto err;
1838 }
1839
1840 sg = elem->in_sg;
1841 if (i == 0) {
1842 assert(offset == 0);
1843 if (n->mergeable_rx_bufs) {
1844 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1845 sg, elem->in_num,
1846 offsetof(typeof(mhdr), num_buffers),
1847 sizeof(mhdr.num_buffers));
1848 }
1849
1850 receive_header(n, sg, elem->in_num, buf, size);
1851 if (n->rss_data.populate_hash) {
1852 offset = sizeof(mhdr);
1853 iov_from_buf(sg, elem->in_num, offset,
1854 buf + offset, n->host_hdr_len - sizeof(mhdr));
1855 }
1856 offset = n->host_hdr_len;
1857 total += n->guest_hdr_len;
1858 guest_offset = n->guest_hdr_len;
1859 } else {
1860 guest_offset = 0;
1861 }
1862
1863 /* copy in packet. ugh */
1864 len = iov_from_buf(sg, elem->in_num, guest_offset,
1865 buf + offset, size - offset);
1866 total += len;
1867 offset += len;
1868 /* If buffers can't be merged, at this point we
1869 * must have consumed the complete packet.
1870 * Otherwise, drop it. */
1871 if (!n->mergeable_rx_bufs && offset < size) {
1872 virtqueue_unpop(q->rx_vq, elem, total);
1873 g_free(elem);
1874 err = size;
1875 goto err;
1876 }
1877
1878 elems[i] = elem;
1879 lens[i] = total;
1880 i++;
1881 }
1882
1883 if (mhdr_cnt) {
1884 virtio_stw_p(vdev, &mhdr.num_buffers, i);
1885 iov_from_buf(mhdr_sg, mhdr_cnt,
1886 0,
1887 &mhdr.num_buffers, sizeof mhdr.num_buffers);
1888 }
1889
1890 for (j = 0; j < i; j++) {
1891 /* signal other side */
1892 virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1893 g_free(elems[j]);
1894 }
1895
1896 virtqueue_flush(q->rx_vq, i);
1897 virtio_notify(vdev, q->rx_vq);
1898
1899 return size;
1900
1901 err:
1902 for (j = 0; j < i; j++) {
1903 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
1904 g_free(elems[j]);
1905 }
1906
1907 return err;
1908 }
1909
1910 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1911 size_t size)
1912 {
1913 RCU_READ_LOCK_GUARD();
1914
1915 return virtio_net_receive_rcu(nc, buf, size, false);
1916 }
1917
1918 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1919 const uint8_t *buf,
1920 VirtioNetRscUnit *unit)
1921 {
1922 uint16_t ip_hdrlen;
1923 struct ip_header *ip;
1924
1925 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1926 + sizeof(struct eth_header));
1927 unit->ip = (void *)ip;
1928 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1929 unit->ip_plen = &ip->ip_len;
1930 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1931 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1932 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1933 }
1934
1935 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1936 const uint8_t *buf,
1937 VirtioNetRscUnit *unit)
1938 {
1939 struct ip6_header *ip6;
1940
1941 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1942 + sizeof(struct eth_header));
1943 unit->ip = ip6;
1944 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1945 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1946 + sizeof(struct ip6_header));
1947 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1948
1949 /* There is a difference between payload lenght in ipv4 and v6,
1950 ip header is excluded in ipv6 */
1951 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1952 }
1953
1954 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1955 VirtioNetRscSeg *seg)
1956 {
1957 int ret;
1958 struct virtio_net_hdr_v1 *h;
1959
1960 h = (struct virtio_net_hdr_v1 *)seg->buf;
1961 h->flags = 0;
1962 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1963
1964 if (seg->is_coalesced) {
1965 h->rsc.segments = seg->packets;
1966 h->rsc.dup_acks = seg->dup_ack;
1967 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1968 if (chain->proto == ETH_P_IP) {
1969 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1970 } else {
1971 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1972 }
1973 }
1974
1975 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1976 QTAILQ_REMOVE(&chain->buffers, seg, next);
1977 g_free(seg->buf);
1978 g_free(seg);
1979
1980 return ret;
1981 }
1982
1983 static void virtio_net_rsc_purge(void *opq)
1984 {
1985 VirtioNetRscSeg *seg, *rn;
1986 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1987
1988 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1989 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1990 chain->stat.purge_failed++;
1991 continue;
1992 }
1993 }
1994
1995 chain->stat.timer++;
1996 if (!QTAILQ_EMPTY(&chain->buffers)) {
1997 timer_mod(chain->drain_timer,
1998 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1999 }
2000 }
2001
2002 static void virtio_net_rsc_cleanup(VirtIONet *n)
2003 {
2004 VirtioNetRscChain *chain, *rn_chain;
2005 VirtioNetRscSeg *seg, *rn_seg;
2006
2007 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2008 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2009 QTAILQ_REMOVE(&chain->buffers, seg, next);
2010 g_free(seg->buf);
2011 g_free(seg);
2012 }
2013
2014 timer_free(chain->drain_timer);
2015 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2016 g_free(chain);
2017 }
2018 }
2019
2020 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2021 NetClientState *nc,
2022 const uint8_t *buf, size_t size)
2023 {
2024 uint16_t hdr_len;
2025 VirtioNetRscSeg *seg;
2026
2027 hdr_len = chain->n->guest_hdr_len;
2028 seg = g_new(VirtioNetRscSeg, 1);
2029 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2030 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2031 memcpy(seg->buf, buf, size);
2032 seg->size = size;
2033 seg->packets = 1;
2034 seg->dup_ack = 0;
2035 seg->is_coalesced = 0;
2036 seg->nc = nc;
2037
2038 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2039 chain->stat.cache++;
2040
2041 switch (chain->proto) {
2042 case ETH_P_IP:
2043 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2044 break;
2045 case ETH_P_IPV6:
2046 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2047 break;
2048 default:
2049 g_assert_not_reached();
2050 }
2051 }
2052
2053 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2054 VirtioNetRscSeg *seg,
2055 const uint8_t *buf,
2056 struct tcp_header *n_tcp,
2057 struct tcp_header *o_tcp)
2058 {
2059 uint32_t nack, oack;
2060 uint16_t nwin, owin;
2061
2062 nack = htonl(n_tcp->th_ack);
2063 nwin = htons(n_tcp->th_win);
2064 oack = htonl(o_tcp->th_ack);
2065 owin = htons(o_tcp->th_win);
2066
2067 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2068 chain->stat.ack_out_of_win++;
2069 return RSC_FINAL;
2070 } else if (nack == oack) {
2071 /* duplicated ack or window probe */
2072 if (nwin == owin) {
2073 /* duplicated ack, add dup ack count due to whql test up to 1 */
2074 chain->stat.dup_ack++;
2075 return RSC_FINAL;
2076 } else {
2077 /* Coalesce window update */
2078 o_tcp->th_win = n_tcp->th_win;
2079 chain->stat.win_update++;
2080 return RSC_COALESCE;
2081 }
2082 } else {
2083 /* pure ack, go to 'C', finalize*/
2084 chain->stat.pure_ack++;
2085 return RSC_FINAL;
2086 }
2087 }
2088
2089 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2090 VirtioNetRscSeg *seg,
2091 const uint8_t *buf,
2092 VirtioNetRscUnit *n_unit)
2093 {
2094 void *data;
2095 uint16_t o_ip_len;
2096 uint32_t nseq, oseq;
2097 VirtioNetRscUnit *o_unit;
2098
2099 o_unit = &seg->unit;
2100 o_ip_len = htons(*o_unit->ip_plen);
2101 nseq = htonl(n_unit->tcp->th_seq);
2102 oseq = htonl(o_unit->tcp->th_seq);
2103
2104 /* out of order or retransmitted. */
2105 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2106 chain->stat.data_out_of_win++;
2107 return RSC_FINAL;
2108 }
2109
2110 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2111 if (nseq == oseq) {
2112 if ((o_unit->payload == 0) && n_unit->payload) {
2113 /* From no payload to payload, normal case, not a dup ack or etc */
2114 chain->stat.data_after_pure_ack++;
2115 goto coalesce;
2116 } else {
2117 return virtio_net_rsc_handle_ack(chain, seg, buf,
2118 n_unit->tcp, o_unit->tcp);
2119 }
2120 } else if ((nseq - oseq) != o_unit->payload) {
2121 /* Not a consistent packet, out of order */
2122 chain->stat.data_out_of_order++;
2123 return RSC_FINAL;
2124 } else {
2125 coalesce:
2126 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2127 chain->stat.over_size++;
2128 return RSC_FINAL;
2129 }
2130
2131 /* Here comes the right data, the payload length in v4/v6 is different,
2132 so use the field value to update and record the new data len */
2133 o_unit->payload += n_unit->payload; /* update new data len */
2134
2135 /* update field in ip header */
2136 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2137
2138 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2139 for windows guest, while this may change the behavior for linux
2140 guest (only if it uses RSC feature). */
2141 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2142
2143 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2144 o_unit->tcp->th_win = n_unit->tcp->th_win;
2145
2146 memmove(seg->buf + seg->size, data, n_unit->payload);
2147 seg->size += n_unit->payload;
2148 seg->packets++;
2149 chain->stat.coalesced++;
2150 return RSC_COALESCE;
2151 }
2152 }
2153
2154 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2155 VirtioNetRscSeg *seg,
2156 const uint8_t *buf, size_t size,
2157 VirtioNetRscUnit *unit)
2158 {
2159 struct ip_header *ip1, *ip2;
2160
2161 ip1 = (struct ip_header *)(unit->ip);
2162 ip2 = (struct ip_header *)(seg->unit.ip);
2163 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2164 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2165 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2166 chain->stat.no_match++;
2167 return RSC_NO_MATCH;
2168 }
2169
2170 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2171 }
2172
2173 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2174 VirtioNetRscSeg *seg,
2175 const uint8_t *buf, size_t size,
2176 VirtioNetRscUnit *unit)
2177 {
2178 struct ip6_header *ip1, *ip2;
2179
2180 ip1 = (struct ip6_header *)(unit->ip);
2181 ip2 = (struct ip6_header *)(seg->unit.ip);
2182 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2183 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2184 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2185 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2186 chain->stat.no_match++;
2187 return RSC_NO_MATCH;
2188 }
2189
2190 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2191 }
2192
2193 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2194 * to prevent out of order */
2195 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2196 struct tcp_header *tcp)
2197 {
2198 uint16_t tcp_hdr;
2199 uint16_t tcp_flag;
2200
2201 tcp_flag = htons(tcp->th_offset_flags);
2202 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2203 tcp_flag &= VIRTIO_NET_TCP_FLAG;
2204 if (tcp_flag & TH_SYN) {
2205 chain->stat.tcp_syn++;
2206 return RSC_BYPASS;
2207 }
2208
2209 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2210 chain->stat.tcp_ctrl_drain++;
2211 return RSC_FINAL;
2212 }
2213
2214 if (tcp_hdr > sizeof(struct tcp_header)) {
2215 chain->stat.tcp_all_opt++;
2216 return RSC_FINAL;
2217 }
2218
2219 return RSC_CANDIDATE;
2220 }
2221
2222 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2223 NetClientState *nc,
2224 const uint8_t *buf, size_t size,
2225 VirtioNetRscUnit *unit)
2226 {
2227 int ret;
2228 VirtioNetRscSeg *seg, *nseg;
2229
2230 if (QTAILQ_EMPTY(&chain->buffers)) {
2231 chain->stat.empty_cache++;
2232 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2233 timer_mod(chain->drain_timer,
2234 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2235 return size;
2236 }
2237
2238 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2239 if (chain->proto == ETH_P_IP) {
2240 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2241 } else {
2242 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2243 }
2244
2245 if (ret == RSC_FINAL) {
2246 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2247 /* Send failed */
2248 chain->stat.final_failed++;
2249 return 0;
2250 }
2251
2252 /* Send current packet */
2253 return virtio_net_do_receive(nc, buf, size);
2254 } else if (ret == RSC_NO_MATCH) {
2255 continue;
2256 } else {
2257 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2258 seg->is_coalesced = 1;
2259 return size;
2260 }
2261 }
2262
2263 chain->stat.no_match_cache++;
2264 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2265 return size;
2266 }
2267
2268 /* Drain a connection data, this is to avoid out of order segments */
2269 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2270 NetClientState *nc,
2271 const uint8_t *buf, size_t size,
2272 uint16_t ip_start, uint16_t ip_size,
2273 uint16_t tcp_port)
2274 {
2275 VirtioNetRscSeg *seg, *nseg;
2276 uint32_t ppair1, ppair2;
2277
2278 ppair1 = *(uint32_t *)(buf + tcp_port);
2279 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2280 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2281 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2282 || (ppair1 != ppair2)) {
2283 continue;
2284 }
2285 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2286 chain->stat.drain_failed++;
2287 }
2288
2289 break;
2290 }
2291
2292 return virtio_net_do_receive(nc, buf, size);
2293 }
2294
2295 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2296 struct ip_header *ip,
2297 const uint8_t *buf, size_t size)
2298 {
2299 uint16_t ip_len;
2300
2301 /* Not an ipv4 packet */
2302 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2303 chain->stat.ip_option++;
2304 return RSC_BYPASS;
2305 }
2306
2307 /* Don't handle packets with ip option */
2308 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2309 chain->stat.ip_option++;
2310 return RSC_BYPASS;
2311 }
2312
2313 if (ip->ip_p != IPPROTO_TCP) {
2314 chain->stat.bypass_not_tcp++;
2315 return RSC_BYPASS;
2316 }
2317
2318 /* Don't handle packets with ip fragment */
2319 if (!(htons(ip->ip_off) & IP_DF)) {
2320 chain->stat.ip_frag++;
2321 return RSC_BYPASS;
2322 }
2323
2324 /* Don't handle packets with ecn flag */
2325 if (IPTOS_ECN(ip->ip_tos)) {
2326 chain->stat.ip_ecn++;
2327 return RSC_BYPASS;
2328 }
2329
2330 ip_len = htons(ip->ip_len);
2331 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2332 || ip_len > (size - chain->n->guest_hdr_len -
2333 sizeof(struct eth_header))) {
2334 chain->stat.ip_hacked++;
2335 return RSC_BYPASS;
2336 }
2337
2338 return RSC_CANDIDATE;
2339 }
2340
2341 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2342 NetClientState *nc,
2343 const uint8_t *buf, size_t size)
2344 {
2345 int32_t ret;
2346 uint16_t hdr_len;
2347 VirtioNetRscUnit unit;
2348
2349 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2350
2351 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2352 + sizeof(struct tcp_header))) {
2353 chain->stat.bypass_not_tcp++;
2354 return virtio_net_do_receive(nc, buf, size);
2355 }
2356
2357 virtio_net_rsc_extract_unit4(chain, buf, &unit);
2358 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2359 != RSC_CANDIDATE) {
2360 return virtio_net_do_receive(nc, buf, size);
2361 }
2362
2363 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2364 if (ret == RSC_BYPASS) {
2365 return virtio_net_do_receive(nc, buf, size);
2366 } else if (ret == RSC_FINAL) {
2367 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2368 ((hdr_len + sizeof(struct eth_header)) + 12),
2369 VIRTIO_NET_IP4_ADDR_SIZE,
2370 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2371 }
2372
2373 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2374 }
2375
2376 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2377 struct ip6_header *ip6,
2378 const uint8_t *buf, size_t size)
2379 {
2380 uint16_t ip_len;
2381
2382 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2383 != IP_HEADER_VERSION_6) {
2384 return RSC_BYPASS;
2385 }
2386
2387 /* Both option and protocol is checked in this */
2388 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2389 chain->stat.bypass_not_tcp++;
2390 return RSC_BYPASS;
2391 }
2392
2393 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2394 if (ip_len < sizeof(struct tcp_header) ||
2395 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2396 - sizeof(struct ip6_header))) {
2397 chain->stat.ip_hacked++;
2398 return RSC_BYPASS;
2399 }
2400
2401 /* Don't handle packets with ecn flag */
2402 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2403 chain->stat.ip_ecn++;
2404 return RSC_BYPASS;
2405 }
2406
2407 return RSC_CANDIDATE;
2408 }
2409
2410 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2411 const uint8_t *buf, size_t size)
2412 {
2413 int32_t ret;
2414 uint16_t hdr_len;
2415 VirtioNetRscChain *chain;
2416 VirtioNetRscUnit unit;
2417
2418 chain = (VirtioNetRscChain *)opq;
2419 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2420
2421 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2422 + sizeof(tcp_header))) {
2423 return virtio_net_do_receive(nc, buf, size);
2424 }
2425
2426 virtio_net_rsc_extract_unit6(chain, buf, &unit);
2427 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2428 unit.ip, buf, size)) {
2429 return virtio_net_do_receive(nc, buf, size);
2430 }
2431
2432 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2433 if (ret == RSC_BYPASS) {
2434 return virtio_net_do_receive(nc, buf, size);
2435 } else if (ret == RSC_FINAL) {
2436 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2437 ((hdr_len + sizeof(struct eth_header)) + 8),
2438 VIRTIO_NET_IP6_ADDR_SIZE,
2439 hdr_len + sizeof(struct eth_header)
2440 + sizeof(struct ip6_header));
2441 }
2442
2443 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2444 }
2445
2446 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2447 NetClientState *nc,
2448 uint16_t proto)
2449 {
2450 VirtioNetRscChain *chain;
2451
2452 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2453 return NULL;
2454 }
2455
2456 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2457 if (chain->proto == proto) {
2458 return chain;
2459 }
2460 }
2461
2462 chain = g_malloc(sizeof(*chain));
2463 chain->n = n;
2464 chain->proto = proto;
2465 if (proto == (uint16_t)ETH_P_IP) {
2466 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2467 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2468 } else {
2469 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2470 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2471 }
2472 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2473 virtio_net_rsc_purge, chain);
2474 memset(&chain->stat, 0, sizeof(chain->stat));
2475
2476 QTAILQ_INIT(&chain->buffers);
2477 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2478
2479 return chain;
2480 }
2481
2482 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2483 const uint8_t *buf,
2484 size_t size)
2485 {
2486 uint16_t proto;
2487 VirtioNetRscChain *chain;
2488 struct eth_header *eth;
2489 VirtIONet *n;
2490
2491 n = qemu_get_nic_opaque(nc);
2492 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2493 return virtio_net_do_receive(nc, buf, size);
2494 }
2495
2496 eth = (struct eth_header *)(buf + n->guest_hdr_len);
2497 proto = htons(eth->h_proto);
2498
2499 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2500 if (chain) {
2501 chain->stat.received++;
2502 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2503 return virtio_net_rsc_receive4(chain, nc, buf, size);
2504 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2505 return virtio_net_rsc_receive6(chain, nc, buf, size);
2506 }
2507 }
2508 return virtio_net_do_receive(nc, buf, size);
2509 }
2510
2511 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2512 size_t size)
2513 {
2514 VirtIONet *n = qemu_get_nic_opaque(nc);
2515 if ((n->rsc4_enabled || n->rsc6_enabled)) {
2516 return virtio_net_rsc_receive(nc, buf, size);
2517 } else {
2518 return virtio_net_do_receive(nc, buf, size);
2519 }
2520 }
2521
2522 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2523
2524 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2525 {
2526 VirtIONet *n = qemu_get_nic_opaque(nc);
2527 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2528 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2529
2530 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2531 virtio_notify(vdev, q->tx_vq);
2532
2533 g_free(q->async_tx.elem);
2534 q->async_tx.elem = NULL;
2535
2536 virtio_queue_set_notification(q->tx_vq, 1);
2537 virtio_net_flush_tx(q);
2538 }
2539
2540 /* TX */
2541 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2542 {
2543 VirtIONet *n = q->n;
2544 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2545 VirtQueueElement *elem;
2546 int32_t num_packets = 0;
2547 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2548 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2549 return num_packets;
2550 }
2551
2552 if (q->async_tx.elem) {
2553 virtio_queue_set_notification(q->tx_vq, 0);
2554 return num_packets;
2555 }
2556
2557 for (;;) {
2558 ssize_t ret;
2559 unsigned int out_num;
2560 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2561 struct virtio_net_hdr_mrg_rxbuf mhdr;
2562
2563 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2564 if (!elem) {
2565 break;
2566 }
2567
2568 out_num = elem->out_num;
2569 out_sg = elem->out_sg;
2570 if (out_num < 1) {
2571 virtio_error(vdev, "virtio-net header not in first element");
2572 virtqueue_detach_element(q->tx_vq, elem, 0);
2573 g_free(elem);
2574 return -EINVAL;
2575 }
2576
2577 if (n->has_vnet_hdr) {
2578 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2579 n->guest_hdr_len) {
2580 virtio_error(vdev, "virtio-net header incorrect");
2581 virtqueue_detach_element(q->tx_vq, elem, 0);
2582 g_free(elem);
2583 return -EINVAL;
2584 }
2585 if (n->needs_vnet_hdr_swap) {
2586 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2587 sg2[0].iov_base = &mhdr;
2588 sg2[0].iov_len = n->guest_hdr_len;
2589 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2590 out_sg, out_num,
2591 n->guest_hdr_len, -1);
2592 if (out_num == VIRTQUEUE_MAX_SIZE) {
2593 goto drop;
2594 }
2595 out_num += 1;
2596 out_sg = sg2;
2597 }
2598 }
2599 /*
2600 * If host wants to see the guest header as is, we can
2601 * pass it on unchanged. Otherwise, copy just the parts
2602 * that host is interested in.
2603 */
2604 assert(n->host_hdr_len <= n->guest_hdr_len);
2605 if (n->host_hdr_len != n->guest_hdr_len) {
2606 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2607 out_sg, out_num,
2608 0, n->host_hdr_len);
2609 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2610 out_sg, out_num,
2611 n->guest_hdr_len, -1);
2612 out_num = sg_num;
2613 out_sg = sg;
2614 }
2615
2616 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2617 out_sg, out_num, virtio_net_tx_complete);
2618 if (ret == 0) {
2619 virtio_queue_set_notification(q->tx_vq, 0);
2620 q->async_tx.elem = elem;
2621 return -EBUSY;
2622 }
2623
2624 drop:
2625 virtqueue_push(q->tx_vq, elem, 0);
2626 virtio_notify(vdev, q->tx_vq);
2627 g_free(elem);
2628
2629 if (++num_packets >= n->tx_burst) {
2630 break;
2631 }
2632 }
2633 return num_packets;
2634 }
2635
2636 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2637 {
2638 VirtIONet *n = VIRTIO_NET(vdev);
2639 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2640
2641 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2642 virtio_net_drop_tx_queue_data(vdev, vq);
2643 return;
2644 }
2645
2646 /* This happens when device was stopped but VCPU wasn't. */
2647 if (!vdev->vm_running) {
2648 q->tx_waiting = 1;
2649 return;
2650 }
2651
2652 if (q->tx_waiting) {
2653 virtio_queue_set_notification(vq, 1);
2654 timer_del(q->tx_timer);
2655 q->tx_waiting = 0;
2656 if (virtio_net_flush_tx(q) == -EINVAL) {
2657 return;
2658 }
2659 } else {
2660 timer_mod(q->tx_timer,
2661 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2662 q->tx_waiting = 1;
2663 virtio_queue_set_notification(vq, 0);
2664 }
2665 }
2666
2667 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2668 {
2669 VirtIONet *n = VIRTIO_NET(vdev);
2670 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2671
2672 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2673 virtio_net_drop_tx_queue_data(vdev, vq);
2674 return;
2675 }
2676
2677 if (unlikely(q->tx_waiting)) {
2678 return;
2679 }
2680 q->tx_waiting = 1;
2681 /* This happens when device was stopped but VCPU wasn't. */
2682 if (!vdev->vm_running) {
2683 return;
2684 }
2685 virtio_queue_set_notification(vq, 0);
2686 qemu_bh_schedule(q->tx_bh);
2687 }
2688
2689 static void virtio_net_tx_timer(void *opaque)
2690 {
2691 VirtIONetQueue *q = opaque;
2692 VirtIONet *n = q->n;
2693 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2694 /* This happens when device was stopped but BH wasn't. */
2695 if (!vdev->vm_running) {
2696 /* Make sure tx waiting is set, so we'll run when restarted. */
2697 assert(q->tx_waiting);
2698 return;
2699 }
2700
2701 q->tx_waiting = 0;
2702
2703 /* Just in case the driver is not ready on more */
2704 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2705 return;
2706 }
2707
2708 virtio_queue_set_notification(q->tx_vq, 1);
2709 virtio_net_flush_tx(q);
2710 }
2711
2712 static void virtio_net_tx_bh(void *opaque)
2713 {
2714 VirtIONetQueue *q = opaque;
2715 VirtIONet *n = q->n;
2716 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2717 int32_t ret;
2718
2719 /* This happens when device was stopped but BH wasn't. */
2720 if (!vdev->vm_running) {
2721 /* Make sure tx waiting is set, so we'll run when restarted. */
2722 assert(q->tx_waiting);
2723 return;
2724 }
2725
2726 q->tx_waiting = 0;
2727
2728 /* Just in case the driver is not ready on more */
2729 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2730 return;
2731 }
2732
2733 ret = virtio_net_flush_tx(q);
2734 if (ret == -EBUSY || ret == -EINVAL) {
2735 return; /* Notification re-enable handled by tx_complete or device
2736 * broken */
2737 }
2738
2739 /* If we flush a full burst of packets, assume there are
2740 * more coming and immediately reschedule */
2741 if (ret >= n->tx_burst) {
2742 qemu_bh_schedule(q->tx_bh);
2743 q->tx_waiting = 1;
2744 return;
2745 }
2746
2747 /* If less than a full burst, re-enable notification and flush
2748 * anything that may have come in while we weren't looking. If
2749 * we find something, assume the guest is still active and reschedule */
2750 virtio_queue_set_notification(q->tx_vq, 1);
2751 ret = virtio_net_flush_tx(q);
2752 if (ret == -EINVAL) {
2753 return;
2754 } else if (ret > 0) {
2755 virtio_queue_set_notification(q->tx_vq, 0);
2756 qemu_bh_schedule(q->tx_bh);
2757 q->tx_waiting = 1;
2758 }
2759 }
2760
2761 static void virtio_net_add_queue(VirtIONet *n, int index)
2762 {
2763 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2764
2765 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2766 virtio_net_handle_rx);
2767
2768 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2769 n->vqs[index].tx_vq =
2770 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2771 virtio_net_handle_tx_timer);
2772 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2773 virtio_net_tx_timer,
2774 &n->vqs[index]);
2775 } else {
2776 n->vqs[index].tx_vq =
2777 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2778 virtio_net_handle_tx_bh);
2779 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2780 }
2781
2782 n->vqs[index].tx_waiting = 0;
2783 n->vqs[index].n = n;
2784 }
2785
2786 static void virtio_net_del_queue(VirtIONet *n, int index)
2787 {
2788 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2789 VirtIONetQueue *q = &n->vqs[index];
2790 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2791
2792 qemu_purge_queued_packets(nc);
2793
2794 virtio_del_queue(vdev, index * 2);
2795 if (q->tx_timer) {
2796 timer_free(q->tx_timer);
2797 q->tx_timer = NULL;
2798 } else {
2799 qemu_bh_delete(q->tx_bh);
2800 q->tx_bh = NULL;
2801 }
2802 q->tx_waiting = 0;
2803 virtio_del_queue(vdev, index * 2 + 1);
2804 }
2805
2806 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2807 {
2808 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2809 int old_num_queues = virtio_get_num_queues(vdev);
2810 int new_num_queues = new_max_queue_pairs * 2 + 1;
2811 int i;
2812
2813 assert(old_num_queues >= 3);
2814 assert(old_num_queues % 2 == 1);
2815
2816 if (old_num_queues == new_num_queues) {
2817 return;
2818 }
2819
2820 /*
2821 * We always need to remove and add ctrl vq if
2822 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2823 * and then we only enter one of the following two loops.
2824 */
2825 virtio_del_queue(vdev, old_num_queues - 1);
2826
2827 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2828 /* new_num_queues < old_num_queues */
2829 virtio_net_del_queue(n, i / 2);
2830 }
2831
2832 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2833 /* new_num_queues > old_num_queues */
2834 virtio_net_add_queue(n, i / 2);
2835 }
2836
2837 /* add ctrl_vq last */
2838 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2839 }
2840
2841 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2842 {
2843 int max = multiqueue ? n->max_queue_pairs : 1;
2844
2845 n->multiqueue = multiqueue;
2846 virtio_net_change_num_queue_pairs(n, max);
2847
2848 virtio_net_set_queue_pairs(n);
2849 }
2850
2851 static int virtio_net_post_load_device(void *opaque, int version_id)
2852 {
2853 VirtIONet *n = opaque;
2854 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2855 int i, link_down;
2856
2857 trace_virtio_net_post_load_device();
2858 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2859 virtio_vdev_has_feature(vdev,
2860 VIRTIO_F_VERSION_1),
2861 virtio_vdev_has_feature(vdev,
2862 VIRTIO_NET_F_HASH_REPORT));
2863
2864 /* MAC_TABLE_ENTRIES may be different from the saved image */
2865 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2866 n->mac_table.in_use = 0;
2867 }
2868
2869 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2870 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2871 }
2872
2873 /*
2874 * curr_guest_offloads will be later overwritten by the
2875 * virtio_set_features_nocheck call done from the virtio_load.
2876 * Here we make sure it is preserved and restored accordingly
2877 * in the virtio_net_post_load_virtio callback.
2878 */
2879 n->saved_guest_offloads = n->curr_guest_offloads;
2880
2881 virtio_net_set_queue_pairs(n);
2882
2883 /* Find the first multicast entry in the saved MAC filter */
2884 for (i = 0; i < n->mac_table.in_use; i++) {
2885 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2886 break;
2887 }
2888 }
2889 n->mac_table.first_multi = i;
2890
2891 /* nc.link_down can't be migrated, so infer link_down according
2892 * to link status bit in n->status */
2893 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2894 for (i = 0; i < n->max_queue_pairs; i++) {
2895 qemu_get_subqueue(n->nic, i)->link_down = link_down;
2896 }
2897
2898 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2899 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2900 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2901 QEMU_CLOCK_VIRTUAL,
2902 virtio_net_announce_timer, n);
2903 if (n->announce_timer.round) {
2904 timer_mod(n->announce_timer.tm,
2905 qemu_clock_get_ms(n->announce_timer.type));
2906 } else {
2907 qemu_announce_timer_del(&n->announce_timer, false);
2908 }
2909 }
2910
2911 if (n->rss_data.enabled) {
2912 n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
2913 if (!n->rss_data.populate_hash) {
2914 if (!virtio_net_attach_epbf_rss(n)) {
2915 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
2916 warn_report("Can't post-load eBPF RSS for vhost");
2917 } else {
2918 warn_report("Can't post-load eBPF RSS - "
2919 "fallback to software RSS");
2920 n->rss_data.enabled_software_rss = true;
2921 }
2922 }
2923 }
2924
2925 trace_virtio_net_rss_enable(n->rss_data.hash_types,
2926 n->rss_data.indirections_len,
2927 sizeof(n->rss_data.key));
2928 } else {
2929 trace_virtio_net_rss_disable();
2930 }
2931 return 0;
2932 }
2933
2934 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2935 {
2936 VirtIONet *n = VIRTIO_NET(vdev);
2937 /*
2938 * The actual needed state is now in saved_guest_offloads,
2939 * see virtio_net_post_load_device for detail.
2940 * Restore it back and apply the desired offloads.
2941 */
2942 n->curr_guest_offloads = n->saved_guest_offloads;
2943 if (peer_has_vnet_hdr(n)) {
2944 virtio_net_apply_guest_offloads(n);
2945 }
2946
2947 return 0;
2948 }
2949
2950 /* tx_waiting field of a VirtIONetQueue */
2951 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2952 .name = "virtio-net-queue-tx_waiting",
2953 .fields = (VMStateField[]) {
2954 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2955 VMSTATE_END_OF_LIST()
2956 },
2957 };
2958
2959 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
2960 {
2961 return VIRTIO_NET(opaque)->max_queue_pairs > 1;
2962 }
2963
2964 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2965 {
2966 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2967 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2968 }
2969
2970 static bool mac_table_fits(void *opaque, int version_id)
2971 {
2972 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2973 }
2974
2975 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2976 {
2977 return !mac_table_fits(opaque, version_id);
2978 }
2979
2980 /* This temporary type is shared by all the WITH_TMP methods
2981 * although only some fields are used by each.
2982 */
2983 struct VirtIONetMigTmp {
2984 VirtIONet *parent;
2985 VirtIONetQueue *vqs_1;
2986 uint16_t curr_queue_pairs_1;
2987 uint8_t has_ufo;
2988 uint32_t has_vnet_hdr;
2989 };
2990
2991 /* The 2nd and subsequent tx_waiting flags are loaded later than
2992 * the 1st entry in the queue_pairs and only if there's more than one
2993 * entry. We use the tmp mechanism to calculate a temporary
2994 * pointer and count and also validate the count.
2995 */
2996
2997 static int virtio_net_tx_waiting_pre_save(void *opaque)
2998 {
2999 struct VirtIONetMigTmp *tmp = opaque;
3000
3001 tmp->vqs_1 = tmp->parent->vqs + 1;
3002 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3003 if (tmp->parent->curr_queue_pairs == 0) {
3004 tmp->curr_queue_pairs_1 = 0;
3005 }
3006
3007 return 0;
3008 }
3009
3010 static int virtio_net_tx_waiting_pre_load(void *opaque)
3011 {
3012 struct VirtIONetMigTmp *tmp = opaque;
3013
3014 /* Reuse the pointer setup from save */
3015 virtio_net_tx_waiting_pre_save(opaque);
3016
3017 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3018 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3019 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3020
3021 return -EINVAL;
3022 }
3023
3024 return 0; /* all good */
3025 }
3026
3027 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3028 .name = "virtio-net-tx_waiting",
3029 .pre_load = virtio_net_tx_waiting_pre_load,
3030 .pre_save = virtio_net_tx_waiting_pre_save,
3031 .fields = (VMStateField[]) {
3032 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3033 curr_queue_pairs_1,
3034 vmstate_virtio_net_queue_tx_waiting,
3035 struct VirtIONetQueue),
3036 VMSTATE_END_OF_LIST()
3037 },
3038 };
3039
3040 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3041 * flag set we need to check that we have it
3042 */
3043 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3044 {
3045 struct VirtIONetMigTmp *tmp = opaque;
3046
3047 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3048 error_report("virtio-net: saved image requires TUN_F_UFO support");
3049 return -EINVAL;
3050 }
3051
3052 return 0;
3053 }
3054
3055 static int virtio_net_ufo_pre_save(void *opaque)
3056 {
3057 struct VirtIONetMigTmp *tmp = opaque;
3058
3059 tmp->has_ufo = tmp->parent->has_ufo;
3060
3061 return 0;
3062 }
3063
3064 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3065 .name = "virtio-net-ufo",
3066 .post_load = virtio_net_ufo_post_load,
3067 .pre_save = virtio_net_ufo_pre_save,
3068 .fields = (VMStateField[]) {
3069 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3070 VMSTATE_END_OF_LIST()
3071 },
3072 };
3073
3074 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3075 * flag set we need to check that we have it
3076 */
3077 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3078 {
3079 struct VirtIONetMigTmp *tmp = opaque;
3080
3081 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3082 error_report("virtio-net: saved image requires vnet_hdr=on");
3083 return -EINVAL;
3084 }
3085
3086 return 0;
3087 }
3088
3089 static int virtio_net_vnet_pre_save(void *opaque)
3090 {
3091 struct VirtIONetMigTmp *tmp = opaque;
3092
3093 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3094
3095 return 0;
3096 }
3097
3098 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3099 .name = "virtio-net-vnet",
3100 .post_load = virtio_net_vnet_post_load,
3101 .pre_save = virtio_net_vnet_pre_save,
3102 .fields = (VMStateField[]) {
3103 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3104 VMSTATE_END_OF_LIST()
3105 },
3106 };
3107
3108 static bool virtio_net_rss_needed(void *opaque)
3109 {
3110 return VIRTIO_NET(opaque)->rss_data.enabled;
3111 }
3112
3113 static const VMStateDescription vmstate_virtio_net_rss = {
3114 .name = "virtio-net-device/rss",
3115 .version_id = 1,
3116 .minimum_version_id = 1,
3117 .needed = virtio_net_rss_needed,
3118 .fields = (VMStateField[]) {
3119 VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3120 VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3121 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3122 VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3123 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3124 VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3125 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3126 VIRTIO_NET_RSS_MAX_KEY_SIZE),
3127 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3128 rss_data.indirections_len, 0,
3129 vmstate_info_uint16, uint16_t),
3130 VMSTATE_END_OF_LIST()
3131 },
3132 };
3133
3134 static const VMStateDescription vmstate_virtio_net_device = {
3135 .name = "virtio-net-device",
3136 .version_id = VIRTIO_NET_VM_VERSION,
3137 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3138 .post_load = virtio_net_post_load_device,
3139 .fields = (VMStateField[]) {
3140 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3141 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3142 vmstate_virtio_net_queue_tx_waiting,
3143 VirtIONetQueue),
3144 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3145 VMSTATE_UINT16(status, VirtIONet),
3146 VMSTATE_UINT8(promisc, VirtIONet),
3147 VMSTATE_UINT8(allmulti, VirtIONet),
3148 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3149
3150 /* Guarded pair: If it fits we load it, else we throw it away
3151 * - can happen if source has a larger MAC table.; post-load
3152 * sets flags in this case.
3153 */
3154 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3155 0, mac_table_fits, mac_table.in_use,
3156 ETH_ALEN),
3157 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3158 mac_table.in_use, ETH_ALEN),
3159
3160 /* Note: This is an array of uint32's that's always been saved as a
3161 * buffer; hold onto your endiannesses; it's actually used as a bitmap
3162 * but based on the uint.
3163 */
3164 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3165 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3166 vmstate_virtio_net_has_vnet),
3167 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3168 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3169 VMSTATE_UINT8(alluni, VirtIONet),
3170 VMSTATE_UINT8(nomulti, VirtIONet),
3171 VMSTATE_UINT8(nouni, VirtIONet),
3172 VMSTATE_UINT8(nobcast, VirtIONet),
3173 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3174 vmstate_virtio_net_has_ufo),
3175 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3176 vmstate_info_uint16_equal, uint16_t),
3177 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3178 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3179 vmstate_virtio_net_tx_waiting),
3180 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3181 has_ctrl_guest_offloads),
3182 VMSTATE_END_OF_LIST()
3183 },
3184 .subsections = (const VMStateDescription * []) {
3185 &vmstate_virtio_net_rss,
3186 NULL
3187 }
3188 };
3189
3190 static NetClientInfo net_virtio_info = {
3191 .type = NET_CLIENT_DRIVER_NIC,
3192 .size = sizeof(NICState),
3193 .can_receive = virtio_net_can_receive,
3194 .receive = virtio_net_receive,
3195 .link_status_changed = virtio_net_set_link_status,
3196 .query_rx_filter = virtio_net_query_rxfilter,
3197 .announce = virtio_net_announce,
3198 };
3199
3200 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3201 {
3202 VirtIONet *n = VIRTIO_NET(vdev);
3203 NetClientState *nc;
3204 assert(n->vhost_started);
3205 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3206 /* Must guard against invalid features and bogus queue index
3207 * from being set by malicious guest, or penetrated through
3208 * buggy migration stream.
3209 */
3210 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3211 qemu_log_mask(LOG_GUEST_ERROR,
3212 "%s: bogus vq index ignored\n", __func__);
3213 return false;
3214 }
3215 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3216 } else {
3217 nc = qemu_get_subqueue(n->nic, vq2q(idx));
3218 }
3219 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3220 }
3221
3222 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3223 bool mask)
3224 {
3225 VirtIONet *n = VIRTIO_NET(vdev);
3226 NetClientState *nc;
3227 assert(n->vhost_started);
3228 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3229 /* Must guard against invalid features and bogus queue index
3230 * from being set by malicious guest, or penetrated through
3231 * buggy migration stream.
3232 */
3233 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3234 qemu_log_mask(LOG_GUEST_ERROR,
3235 "%s: bogus vq index ignored\n", __func__);
3236 return;
3237 }
3238 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3239 } else {
3240 nc = qemu_get_subqueue(n->nic, vq2q(idx));
3241 }
3242 vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3243 vdev, idx, mask);
3244 }
3245
3246 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3247 {
3248 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3249
3250 n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3251 }
3252
3253 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3254 const char *type)
3255 {
3256 /*
3257 * The name can be NULL, the netclient name will be type.x.
3258 */
3259 assert(type != NULL);
3260
3261 g_free(n->netclient_name);
3262 g_free(n->netclient_type);
3263 n->netclient_name = g_strdup(name);
3264 n->netclient_type = g_strdup(type);
3265 }
3266
3267 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3268 {
3269 HotplugHandler *hotplug_ctrl;
3270 PCIDevice *pci_dev;
3271 Error *err = NULL;
3272
3273 hotplug_ctrl = qdev_get_hotplug_handler(dev);
3274 if (hotplug_ctrl) {
3275 pci_dev = PCI_DEVICE(dev);
3276 pci_dev->partially_hotplugged = true;
3277 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3278 if (err) {
3279 error_report_err(err);
3280 return false;
3281 }
3282 } else {
3283 return false;
3284 }
3285 return true;
3286 }
3287
3288 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3289 Error **errp)
3290 {
3291 Error *err = NULL;
3292 HotplugHandler *hotplug_ctrl;
3293 PCIDevice *pdev = PCI_DEVICE(dev);
3294 BusState *primary_bus;
3295
3296 if (!pdev->partially_hotplugged) {
3297 return true;
3298 }
3299 primary_bus = dev->parent_bus;
3300 if (!primary_bus) {
3301 error_setg(errp, "virtio_net: couldn't find primary bus");
3302 return false;
3303 }
3304 qdev_set_parent_bus(dev, primary_bus, &error_abort);
3305 qatomic_set(&n->failover_primary_hidden, false);
3306 hotplug_ctrl = qdev_get_hotplug_handler(dev);
3307 if (hotplug_ctrl) {
3308 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3309 if (err) {
3310 goto out;
3311 }
3312 hotplug_handler_plug(hotplug_ctrl, dev, &err);
3313 }
3314 pdev->partially_hotplugged = false;
3315
3316 out:
3317 error_propagate(errp, err);
3318 return !err;
3319 }
3320
3321 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3322 {
3323 bool should_be_hidden;
3324 Error *err = NULL;
3325 DeviceState *dev = failover_find_primary_device(n);
3326
3327 if (!dev) {
3328 return;
3329 }
3330
3331 should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3332
3333 if (migration_in_setup(s) && !should_be_hidden) {
3334 if (failover_unplug_primary(n, dev)) {
3335 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3336 qapi_event_send_unplug_primary(dev->id);
3337 qatomic_set(&n->failover_primary_hidden, true);
3338 } else {
3339 warn_report("couldn't unplug primary device");
3340 }
3341 } else if (migration_has_failed(s)) {
3342 /* We already unplugged the device let's plug it back */
3343 if (!failover_replug_primary(n, dev, &err)) {
3344 if (err) {
3345 error_report_err(err);
3346 }
3347 }
3348 }
3349 }
3350
3351 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3352 {
3353 MigrationState *s = data;
3354 VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3355 virtio_net_handle_migration_primary(n, s);
3356 }
3357
3358 static bool failover_hide_primary_device(DeviceListener *listener,
3359 const QDict *device_opts,
3360 bool from_json,
3361 Error **errp)
3362 {
3363 VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3364 const char *standby_id;
3365
3366 if (!device_opts) {
3367 return false;
3368 }
3369
3370 if (!qdict_haskey(device_opts, "failover_pair_id")) {
3371 return false;
3372 }
3373
3374 if (!qdict_haskey(device_opts, "id")) {
3375 error_setg(errp, "Device with failover_pair_id needs to have id");
3376 return false;
3377 }
3378
3379 standby_id = qdict_get_str(device_opts, "failover_pair_id");
3380 if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3381 return false;
3382 }
3383
3384 /*
3385 * The hide helper can be called several times for a given device.
3386 * Check there is only one primary for a virtio-net device but
3387 * don't duplicate the qdict several times if it's called for the same
3388 * device.
3389 */
3390 if (n->primary_opts) {
3391 const char *old, *new;
3392 /* devices with failover_pair_id always have an id */
3393 old = qdict_get_str(n->primary_opts, "id");
3394 new = qdict_get_str(device_opts, "id");
3395 if (strcmp(old, new) != 0) {
3396 error_setg(errp, "Cannot attach more than one primary device to "
3397 "'%s': '%s' and '%s'", n->netclient_name, old, new);
3398 return false;
3399 }
3400 } else {
3401 n->primary_opts = qdict_clone_shallow(device_opts);
3402 n->primary_opts_from_json = from_json;
3403 }
3404
3405 /* failover_primary_hidden is set during feature negotiation */
3406 return qatomic_read(&n->failover_primary_hidden);
3407 }
3408
3409 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3410 {
3411 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3412 VirtIONet *n = VIRTIO_NET(dev);
3413 NetClientState *nc;
3414 int i;
3415
3416 if (n->net_conf.mtu) {
3417 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3418 }
3419
3420 if (n->net_conf.duplex_str) {
3421 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3422 n->net_conf.duplex = DUPLEX_HALF;
3423 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3424 n->net_conf.duplex = DUPLEX_FULL;
3425 } else {
3426 error_setg(errp, "'duplex' must be 'half' or 'full'");
3427 return;
3428 }
3429 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3430 } else {
3431 n->net_conf.duplex = DUPLEX_UNKNOWN;
3432 }
3433
3434 if (n->net_conf.speed < SPEED_UNKNOWN) {
3435 error_setg(errp, "'speed' must be between 0 and INT_MAX");
3436 return;
3437 }
3438 if (n->net_conf.speed >= 0) {
3439 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3440 }
3441
3442 if (n->failover) {
3443 n->primary_listener.hide_device = failover_hide_primary_device;
3444 qatomic_set(&n->failover_primary_hidden, true);
3445 device_listener_register(&n->primary_listener);
3446 n->migration_state.notify = virtio_net_migration_state_notifier;
3447 add_migration_state_change_notifier(&n->migration_state);
3448 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3449 }
3450
3451 virtio_net_set_config_size(n, n->host_features);
3452 virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3453
3454 /*
3455 * We set a lower limit on RX queue size to what it always was.
3456 * Guests that want a smaller ring can always resize it without
3457 * help from us (using virtio 1 and up).
3458 */
3459 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3460 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3461 !is_power_of_2(n->net_conf.rx_queue_size)) {
3462 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3463 "must be a power of 2 between %d and %d.",
3464 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3465 VIRTQUEUE_MAX_SIZE);
3466 virtio_cleanup(vdev);
3467 return;
3468 }
3469
3470 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3471 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3472 !is_power_of_2(n->net_conf.tx_queue_size)) {
3473 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3474 "must be a power of 2 between %d and %d",
3475 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3476 VIRTQUEUE_MAX_SIZE);
3477 virtio_cleanup(vdev);
3478 return;
3479 }
3480
3481 n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3482
3483 /*
3484 * Figure out the datapath queue pairs since the backend could
3485 * provide control queue via peers as well.
3486 */
3487 if (n->nic_conf.peers.queues) {
3488 for (i = 0; i < n->max_ncs; i++) {
3489 if (n->nic_conf.peers.ncs[i]->is_datapath) {
3490 ++n->max_queue_pairs;
3491 }
3492 }
3493 }
3494 n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3495
3496 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3497 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3498 "must be a positive integer less than %d.",
3499 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3500 virtio_cleanup(vdev);
3501 return;
3502 }
3503 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3504 n->curr_queue_pairs = 1;
3505 n->tx_timeout = n->net_conf.txtimer;
3506
3507 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3508 && strcmp(n->net_conf.tx, "bh")) {
3509 warn_report("virtio-net: "
3510 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3511 n->net_conf.tx);
3512 error_printf("Defaulting to \"bh\"");
3513 }
3514
3515 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3516 n->net_conf.tx_queue_size);
3517
3518 for (i = 0; i < n->max_queue_pairs; i++) {
3519 virtio_net_add_queue(n, i);
3520 }
3521
3522 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3523 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3524 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3525 n->status = VIRTIO_NET_S_LINK_UP;
3526 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3527 QEMU_CLOCK_VIRTUAL,
3528 virtio_net_announce_timer, n);
3529 n->announce_timer.round = 0;
3530
3531 if (n->netclient_type) {
3532 /*
3533 * Happen when virtio_net_set_netclient_name has been called.
3534 */
3535 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3536 n->netclient_type, n->netclient_name, n);
3537 } else {
3538 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3539 object_get_typename(OBJECT(dev)), dev->id, n);
3540 }
3541
3542 for (i = 0; i < n->max_queue_pairs; i++) {
3543 n->nic->ncs[i].do_not_pad = true;
3544 }
3545
3546 peer_test_vnet_hdr(n);
3547 if (peer_has_vnet_hdr(n)) {
3548 for (i = 0; i < n->max_queue_pairs; i++) {
3549 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3550 }
3551 n->host_hdr_len = sizeof(struct virtio_net_hdr);
3552 } else {
3553 n->host_hdr_len = 0;
3554 }
3555
3556 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3557
3558 n->vqs[0].tx_waiting = 0;
3559 n->tx_burst = n->net_conf.txburst;
3560 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3561 n->promisc = 1; /* for compatibility */
3562
3563 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3564
3565 n->vlans = g_malloc0(MAX_VLAN >> 3);
3566
3567 nc = qemu_get_queue(n->nic);
3568 nc->rxfilter_notify_enabled = 1;
3569
3570 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3571 struct virtio_net_config netcfg = {};
3572 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3573 vhost_net_set_config(get_vhost_net(nc->peer),
3574 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3575 }
3576 QTAILQ_INIT(&n->rsc_chains);
3577 n->qdev = dev;
3578
3579 net_rx_pkt_init(&n->rx_pkt, false);
3580
3581 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3582 virtio_net_load_ebpf(n);
3583 }
3584 }
3585
3586 static void virtio_net_device_unrealize(DeviceState *dev)
3587 {
3588 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3589 VirtIONet *n = VIRTIO_NET(dev);
3590 int i, max_queue_pairs;
3591
3592 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3593 virtio_net_unload_ebpf(n);
3594 }
3595
3596 /* This will stop vhost backend if appropriate. */
3597 virtio_net_set_status(vdev, 0);
3598
3599 g_free(n->netclient_name);
3600 n->netclient_name = NULL;
3601 g_free(n->netclient_type);
3602 n->netclient_type = NULL;
3603
3604 g_free(n->mac_table.macs);
3605 g_free(n->vlans);
3606
3607 if (n->failover) {
3608 qobject_unref(n->primary_opts);
3609 device_listener_unregister(&n->primary_listener);
3610 remove_migration_state_change_notifier(&n->migration_state);
3611 } else {
3612 assert(n->primary_opts == NULL);
3613 }
3614
3615 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3616 for (i = 0; i < max_queue_pairs; i++) {
3617 virtio_net_del_queue(n, i);
3618 }
3619 /* delete also control vq */
3620 virtio_del_queue(vdev, max_queue_pairs * 2);
3621 qemu_announce_timer_del(&n->announce_timer, false);
3622 g_free(n->vqs);
3623 qemu_del_nic(n->nic);
3624 virtio_net_rsc_cleanup(n);
3625 g_free(n->rss_data.indirections_table);
3626 net_rx_pkt_uninit(n->rx_pkt);
3627 virtio_cleanup(vdev);
3628 }
3629
3630 static void virtio_net_instance_init(Object *obj)
3631 {
3632 VirtIONet *n = VIRTIO_NET(obj);
3633
3634 /*
3635 * The default config_size is sizeof(struct virtio_net_config).
3636 * Can be overriden with virtio_net_set_config_size.
3637 */
3638 n->config_size = sizeof(struct virtio_net_config);
3639 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3640 "bootindex", "/ethernet-phy@0",
3641 DEVICE(n));
3642
3643 ebpf_rss_init(&n->ebpf_rss);
3644 }
3645
3646 static int virtio_net_pre_save(void *opaque)
3647 {
3648 VirtIONet *n = opaque;
3649
3650 /* At this point, backend must be stopped, otherwise
3651 * it might keep writing to memory. */
3652 assert(!n->vhost_started);
3653
3654 return 0;
3655 }
3656
3657 static bool primary_unplug_pending(void *opaque)
3658 {
3659 DeviceState *dev = opaque;
3660 DeviceState *primary;
3661 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3662 VirtIONet *n = VIRTIO_NET(vdev);
3663
3664 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3665 return false;
3666 }
3667 primary = failover_find_primary_device(n);
3668 return primary ? primary->pending_deleted_event : false;
3669 }
3670
3671 static bool dev_unplug_pending(void *opaque)
3672 {
3673 DeviceState *dev = opaque;
3674 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3675
3676 return vdc->primary_unplug_pending(dev);
3677 }
3678
3679 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3680 {
3681 VirtIONet *n = VIRTIO_NET(vdev);
3682 NetClientState *nc = qemu_get_queue(n->nic);
3683 struct vhost_net *net = get_vhost_net(nc->peer);
3684 return &net->dev;
3685 }
3686
3687 static const VMStateDescription vmstate_virtio_net = {
3688 .name = "virtio-net",
3689 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3690 .version_id = VIRTIO_NET_VM_VERSION,
3691 .fields = (VMStateField[]) {
3692 VMSTATE_VIRTIO_DEVICE,
3693 VMSTATE_END_OF_LIST()
3694 },
3695 .pre_save = virtio_net_pre_save,
3696 .dev_unplug_pending = dev_unplug_pending,
3697 };
3698
3699 static Property virtio_net_properties[] = {
3700 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3701 VIRTIO_NET_F_CSUM, true),
3702 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3703 VIRTIO_NET_F_GUEST_CSUM, true),
3704 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3705 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3706 VIRTIO_NET_F_GUEST_TSO4, true),
3707 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3708 VIRTIO_NET_F_GUEST_TSO6, true),
3709 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3710 VIRTIO_NET_F_GUEST_ECN, true),
3711 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3712 VIRTIO_NET_F_GUEST_UFO, true),
3713 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3714 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3715 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3716 VIRTIO_NET_F_HOST_TSO4, true),
3717 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3718 VIRTIO_NET_F_HOST_TSO6, true),
3719 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3720 VIRTIO_NET_F_HOST_ECN, true),
3721 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3722 VIRTIO_NET_F_HOST_UFO, true),
3723 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3724 VIRTIO_NET_F_MRG_RXBUF, true),
3725 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3726 VIRTIO_NET_F_STATUS, true),
3727 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3728 VIRTIO_NET_F_CTRL_VQ, true),
3729 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3730 VIRTIO_NET_F_CTRL_RX, true),
3731 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3732 VIRTIO_NET_F_CTRL_VLAN, true),
3733 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3734 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3735 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3736 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3737 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3738 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3739 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3740 DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3741 VIRTIO_NET_F_RSS, false),
3742 DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3743 VIRTIO_NET_F_HASH_REPORT, false),
3744 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3745 VIRTIO_NET_F_RSC_EXT, false),
3746 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3747 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3748 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3749 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3750 TX_TIMER_INTERVAL),
3751 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3752 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3753 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3754 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3755 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3756 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3757 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3758 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3759 true),
3760 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3761 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3762 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3763 DEFINE_PROP_END_OF_LIST(),
3764 };
3765
3766 static void virtio_net_class_init(ObjectClass *klass, void *data)
3767 {
3768 DeviceClass *dc = DEVICE_CLASS(klass);
3769 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3770
3771 device_class_set_props(dc, virtio_net_properties);
3772 dc->vmsd = &vmstate_virtio_net;
3773 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3774 vdc->realize = virtio_net_device_realize;
3775 vdc->unrealize = virtio_net_device_unrealize;
3776 vdc->get_config = virtio_net_get_config;
3777 vdc->set_config = virtio_net_set_config;
3778 vdc->get_features = virtio_net_get_features;
3779 vdc->set_features = virtio_net_set_features;
3780 vdc->bad_features = virtio_net_bad_features;
3781 vdc->reset = virtio_net_reset;
3782 vdc->set_status = virtio_net_set_status;
3783 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3784 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3785 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3786 vdc->post_load = virtio_net_post_load_virtio;
3787 vdc->vmsd = &vmstate_virtio_net_device;
3788 vdc->primary_unplug_pending = primary_unplug_pending;
3789 vdc->get_vhost = virtio_net_get_vhost;
3790 }
3791
3792 static const TypeInfo virtio_net_info = {
3793 .name = TYPE_VIRTIO_NET,
3794 .parent = TYPE_VIRTIO_DEVICE,
3795 .instance_size = sizeof(VirtIONet),
3796 .instance_init = virtio_net_instance_init,
3797 .class_init = virtio_net_class_init,
3798 };
3799
3800 static void virtio_register_types(void)
3801 {
3802 type_register_static(&virtio_net_info);
3803 }
3804
3805 type_init(virtio_register_types)