]> git.proxmox.com Git - mirror_qemu.git/blob - hw/net/virtio-net.c
Merge tag 'pull-qapi-2023-07-10' of https://repo.or.cz/qemu/armbru into staging
[mirror_qemu.git] / hw / net / virtio-net.c
1 /*
2 * Virtio Network Device
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qapi/qmp/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "sysemu/sysemu.h"
43 #include "trace.h"
44 #include "monitor/qdev.h"
45 #include "hw/pci/pci_device.h"
46 #include "net_rx_pkt.h"
47 #include "hw/virtio/vhost.h"
48 #include "sysemu/qtest.h"
49
50 #define VIRTIO_NET_VM_VERSION 11
51
52 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
53
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61
62 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
63
64 #define VIRTIO_NET_TCP_FLAG 0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
66
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73
74 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76
77 /* Purge coalesced packets timer interval, This value affects the performance
78 a lot, and should be tuned carefully, '300000'(300us) is the recommended
79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80 tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92
93 static const VirtIOFeature feature_sizes[] = {
94 {.flags = 1ULL << VIRTIO_NET_F_MAC,
95 .end = endof(struct virtio_net_config, mac)},
96 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97 .end = endof(struct virtio_net_config, status)},
98 {.flags = 1ULL << VIRTIO_NET_F_MQ,
99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100 {.flags = 1ULL << VIRTIO_NET_F_MTU,
101 .end = endof(struct virtio_net_config, mtu)},
102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103 .end = endof(struct virtio_net_config, duplex)},
104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105 .end = endof(struct virtio_net_config, supported_hash_types)},
106 {}
107 };
108
109 static const VirtIOConfigSizeParams cfg_size_params = {
110 .min_size = endof(struct virtio_net_config, mac),
111 .max_size = sizeof(struct virtio_net_config),
112 .feature_sizes = feature_sizes
113 };
114
115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
116 {
117 VirtIONet *n = qemu_get_nic_opaque(nc);
118
119 return &n->vqs[nc->queue_index];
120 }
121
122 static int vq2q(int queue_index)
123 {
124 return queue_index / 2;
125 }
126
127 static void flush_or_purge_queued_packets(NetClientState *nc)
128 {
129 if (!nc->peer) {
130 return;
131 }
132
133 qemu_flush_or_purge_queued_packets(nc->peer, true);
134 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
135 }
136
137 /* TODO
138 * - we could suppress RX interrupt if we were so inclined.
139 */
140
141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
142 {
143 VirtIONet *n = VIRTIO_NET(vdev);
144 struct virtio_net_config netcfg;
145 NetClientState *nc = qemu_get_queue(n->nic);
146 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
147
148 int ret = 0;
149 memset(&netcfg, 0 , sizeof(struct virtio_net_config));
150 virtio_stw_p(vdev, &netcfg.status, n->status);
151 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
152 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
153 memcpy(netcfg.mac, n->mac, ETH_ALEN);
154 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
155 netcfg.duplex = n->net_conf.duplex;
156 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
157 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
158 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
159 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
160 virtio_stl_p(vdev, &netcfg.supported_hash_types,
161 VIRTIO_NET_RSS_SUPPORTED_HASHES);
162 memcpy(config, &netcfg, n->config_size);
163
164 /*
165 * Is this VDPA? No peer means not VDPA: there's no way to
166 * disconnect/reconnect a VDPA peer.
167 */
168 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
169 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
170 n->config_size);
171 if (ret == -1) {
172 return;
173 }
174
175 /*
176 * Some NIC/kernel combinations present 0 as the mac address. As that
177 * is not a legal address, try to proceed with the address from the
178 * QEMU command line in the hope that the address has been configured
179 * correctly elsewhere - just not reported by the device.
180 */
181 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
182 info_report("Zero hardware mac address detected. Ignoring.");
183 memcpy(netcfg.mac, n->mac, ETH_ALEN);
184 }
185
186 netcfg.status |= virtio_tswap16(vdev,
187 n->status & VIRTIO_NET_S_ANNOUNCE);
188 memcpy(config, &netcfg, n->config_size);
189 }
190 }
191
192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
193 {
194 VirtIONet *n = VIRTIO_NET(vdev);
195 struct virtio_net_config netcfg = {};
196 NetClientState *nc = qemu_get_queue(n->nic);
197
198 memcpy(&netcfg, config, n->config_size);
199
200 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
201 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
202 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
203 memcpy(n->mac, netcfg.mac, ETH_ALEN);
204 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
205 }
206
207 /*
208 * Is this VDPA? No peer means not VDPA: there's no way to
209 * disconnect/reconnect a VDPA peer.
210 */
211 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
212 vhost_net_set_config(get_vhost_net(nc->peer),
213 (uint8_t *)&netcfg, 0, n->config_size,
214 VHOST_SET_CONFIG_TYPE_FRONTEND);
215 }
216 }
217
218 static bool virtio_net_started(VirtIONet *n, uint8_t status)
219 {
220 VirtIODevice *vdev = VIRTIO_DEVICE(n);
221 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
222 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
223 }
224
225 static void virtio_net_announce_notify(VirtIONet *net)
226 {
227 VirtIODevice *vdev = VIRTIO_DEVICE(net);
228 trace_virtio_net_announce_notify();
229
230 net->status |= VIRTIO_NET_S_ANNOUNCE;
231 virtio_notify_config(vdev);
232 }
233
234 static void virtio_net_announce_timer(void *opaque)
235 {
236 VirtIONet *n = opaque;
237 trace_virtio_net_announce_timer(n->announce_timer.round);
238
239 n->announce_timer.round--;
240 virtio_net_announce_notify(n);
241 }
242
243 static void virtio_net_announce(NetClientState *nc)
244 {
245 VirtIONet *n = qemu_get_nic_opaque(nc);
246 VirtIODevice *vdev = VIRTIO_DEVICE(n);
247
248 /*
249 * Make sure the virtio migration announcement timer isn't running
250 * If it is, let it trigger announcement so that we do not cause
251 * confusion.
252 */
253 if (n->announce_timer.round) {
254 return;
255 }
256
257 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
258 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
259 virtio_net_announce_notify(n);
260 }
261 }
262
263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
264 {
265 VirtIODevice *vdev = VIRTIO_DEVICE(n);
266 NetClientState *nc = qemu_get_queue(n->nic);
267 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
268 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
269 n->max_ncs - n->max_queue_pairs : 0;
270
271 if (!get_vhost_net(nc->peer)) {
272 return;
273 }
274
275 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
276 !!n->vhost_started) {
277 return;
278 }
279 if (!n->vhost_started) {
280 int r, i;
281
282 if (n->needs_vnet_hdr_swap) {
283 error_report("backend does not support %s vnet headers; "
284 "falling back on userspace virtio",
285 virtio_is_big_endian(vdev) ? "BE" : "LE");
286 return;
287 }
288
289 /* Any packets outstanding? Purge them to avoid touching rings
290 * when vhost is running.
291 */
292 for (i = 0; i < queue_pairs; i++) {
293 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
294
295 /* Purge both directions: TX and RX. */
296 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
297 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
298 }
299
300 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
301 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
302 if (r < 0) {
303 error_report("%uBytes MTU not supported by the backend",
304 n->net_conf.mtu);
305
306 return;
307 }
308 }
309
310 n->vhost_started = 1;
311 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
312 if (r < 0) {
313 error_report("unable to start vhost net: %d: "
314 "falling back on userspace virtio", -r);
315 n->vhost_started = 0;
316 }
317 } else {
318 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
319 n->vhost_started = 0;
320 }
321 }
322
323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
324 NetClientState *peer,
325 bool enable)
326 {
327 if (virtio_is_big_endian(vdev)) {
328 return qemu_set_vnet_be(peer, enable);
329 } else {
330 return qemu_set_vnet_le(peer, enable);
331 }
332 }
333
334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
335 int queue_pairs, bool enable)
336 {
337 int i;
338
339 for (i = 0; i < queue_pairs; i++) {
340 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
341 enable) {
342 while (--i >= 0) {
343 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
344 }
345
346 return true;
347 }
348 }
349
350 return false;
351 }
352
353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
354 {
355 VirtIODevice *vdev = VIRTIO_DEVICE(n);
356 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
357
358 if (virtio_net_started(n, status)) {
359 /* Before using the device, we tell the network backend about the
360 * endianness to use when parsing vnet headers. If the backend
361 * can't do it, we fallback onto fixing the headers in the core
362 * virtio-net code.
363 */
364 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
365 queue_pairs, true);
366 } else if (virtio_net_started(n, vdev->status)) {
367 /* After using the device, we need to reset the network backend to
368 * the default (guest native endianness), otherwise the guest may
369 * lose network connectivity if it is rebooted into a different
370 * endianness.
371 */
372 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
373 }
374 }
375
376 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
377 {
378 unsigned int dropped = virtqueue_drop_all(vq);
379 if (dropped) {
380 virtio_notify(vdev, vq);
381 }
382 }
383
384 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
385 {
386 VirtIONet *n = VIRTIO_NET(vdev);
387 VirtIONetQueue *q;
388 int i;
389 uint8_t queue_status;
390
391 virtio_net_vnet_endian_status(n, status);
392 virtio_net_vhost_status(n, status);
393
394 for (i = 0; i < n->max_queue_pairs; i++) {
395 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
396 bool queue_started;
397 q = &n->vqs[i];
398
399 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
400 queue_status = 0;
401 } else {
402 queue_status = status;
403 }
404 queue_started =
405 virtio_net_started(n, queue_status) && !n->vhost_started;
406
407 if (queue_started) {
408 qemu_flush_queued_packets(ncs);
409 }
410
411 if (!q->tx_waiting) {
412 continue;
413 }
414
415 if (queue_started) {
416 if (q->tx_timer) {
417 timer_mod(q->tx_timer,
418 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
419 } else {
420 qemu_bh_schedule(q->tx_bh);
421 }
422 } else {
423 if (q->tx_timer) {
424 timer_del(q->tx_timer);
425 } else {
426 qemu_bh_cancel(q->tx_bh);
427 }
428 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
429 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
430 vdev->vm_running) {
431 /* if tx is waiting we are likely have some packets in tx queue
432 * and disabled notification */
433 q->tx_waiting = 0;
434 virtio_queue_set_notification(q->tx_vq, 1);
435 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
436 }
437 }
438 }
439 }
440
441 static void virtio_net_set_link_status(NetClientState *nc)
442 {
443 VirtIONet *n = qemu_get_nic_opaque(nc);
444 VirtIODevice *vdev = VIRTIO_DEVICE(n);
445 uint16_t old_status = n->status;
446
447 if (nc->link_down)
448 n->status &= ~VIRTIO_NET_S_LINK_UP;
449 else
450 n->status |= VIRTIO_NET_S_LINK_UP;
451
452 if (n->status != old_status)
453 virtio_notify_config(vdev);
454
455 virtio_net_set_status(vdev, vdev->status);
456 }
457
458 static void rxfilter_notify(NetClientState *nc)
459 {
460 VirtIONet *n = qemu_get_nic_opaque(nc);
461
462 if (nc->rxfilter_notify_enabled) {
463 char *path = object_get_canonical_path(OBJECT(n->qdev));
464 qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
465 g_free(path);
466
467 /* disable event notification to avoid events flooding */
468 nc->rxfilter_notify_enabled = 0;
469 }
470 }
471
472 static intList *get_vlan_table(VirtIONet *n)
473 {
474 intList *list;
475 int i, j;
476
477 list = NULL;
478 for (i = 0; i < MAX_VLAN >> 5; i++) {
479 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
480 if (n->vlans[i] & (1U << j)) {
481 QAPI_LIST_PREPEND(list, (i << 5) + j);
482 }
483 }
484 }
485
486 return list;
487 }
488
489 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
490 {
491 VirtIONet *n = qemu_get_nic_opaque(nc);
492 VirtIODevice *vdev = VIRTIO_DEVICE(n);
493 RxFilterInfo *info;
494 strList *str_list;
495 int i;
496
497 info = g_malloc0(sizeof(*info));
498 info->name = g_strdup(nc->name);
499 info->promiscuous = n->promisc;
500
501 if (n->nouni) {
502 info->unicast = RX_STATE_NONE;
503 } else if (n->alluni) {
504 info->unicast = RX_STATE_ALL;
505 } else {
506 info->unicast = RX_STATE_NORMAL;
507 }
508
509 if (n->nomulti) {
510 info->multicast = RX_STATE_NONE;
511 } else if (n->allmulti) {
512 info->multicast = RX_STATE_ALL;
513 } else {
514 info->multicast = RX_STATE_NORMAL;
515 }
516
517 info->broadcast_allowed = n->nobcast;
518 info->multicast_overflow = n->mac_table.multi_overflow;
519 info->unicast_overflow = n->mac_table.uni_overflow;
520
521 info->main_mac = qemu_mac_strdup_printf(n->mac);
522
523 str_list = NULL;
524 for (i = 0; i < n->mac_table.first_multi; i++) {
525 QAPI_LIST_PREPEND(str_list,
526 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
527 }
528 info->unicast_table = str_list;
529
530 str_list = NULL;
531 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
532 QAPI_LIST_PREPEND(str_list,
533 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
534 }
535 info->multicast_table = str_list;
536 info->vlan_table = get_vlan_table(n);
537
538 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
539 info->vlan = RX_STATE_ALL;
540 } else if (!info->vlan_table) {
541 info->vlan = RX_STATE_NONE;
542 } else {
543 info->vlan = RX_STATE_NORMAL;
544 }
545
546 /* enable event notification after query */
547 nc->rxfilter_notify_enabled = 1;
548
549 return info;
550 }
551
552 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
553 {
554 VirtIONet *n = VIRTIO_NET(vdev);
555 NetClientState *nc;
556
557 /* validate queue_index and skip for cvq */
558 if (queue_index >= n->max_queue_pairs * 2) {
559 return;
560 }
561
562 nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
563
564 if (!nc->peer) {
565 return;
566 }
567
568 if (get_vhost_net(nc->peer) &&
569 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
570 vhost_net_virtqueue_reset(vdev, nc, queue_index);
571 }
572
573 flush_or_purge_queued_packets(nc);
574 }
575
576 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
577 {
578 VirtIONet *n = VIRTIO_NET(vdev);
579 NetClientState *nc;
580 int r;
581
582 /* validate queue_index and skip for cvq */
583 if (queue_index >= n->max_queue_pairs * 2) {
584 return;
585 }
586
587 nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
588
589 if (!nc->peer || !vdev->vhost_started) {
590 return;
591 }
592
593 if (get_vhost_net(nc->peer) &&
594 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
595 r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
596 if (r < 0) {
597 error_report("unable to restart vhost net virtqueue: %d, "
598 "when resetting the queue", queue_index);
599 }
600 }
601 }
602
603 static void virtio_net_reset(VirtIODevice *vdev)
604 {
605 VirtIONet *n = VIRTIO_NET(vdev);
606 int i;
607
608 /* Reset back to compatibility mode */
609 n->promisc = 1;
610 n->allmulti = 0;
611 n->alluni = 0;
612 n->nomulti = 0;
613 n->nouni = 0;
614 n->nobcast = 0;
615 /* multiqueue is disabled by default */
616 n->curr_queue_pairs = 1;
617 timer_del(n->announce_timer.tm);
618 n->announce_timer.round = 0;
619 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
620
621 /* Flush any MAC and VLAN filter table state */
622 n->mac_table.in_use = 0;
623 n->mac_table.first_multi = 0;
624 n->mac_table.multi_overflow = 0;
625 n->mac_table.uni_overflow = 0;
626 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
627 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
628 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
629 memset(n->vlans, 0, MAX_VLAN >> 3);
630
631 /* Flush any async TX */
632 for (i = 0; i < n->max_queue_pairs; i++) {
633 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
634 }
635 }
636
637 static void peer_test_vnet_hdr(VirtIONet *n)
638 {
639 NetClientState *nc = qemu_get_queue(n->nic);
640 if (!nc->peer) {
641 return;
642 }
643
644 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
645 }
646
647 static int peer_has_vnet_hdr(VirtIONet *n)
648 {
649 return n->has_vnet_hdr;
650 }
651
652 static int peer_has_ufo(VirtIONet *n)
653 {
654 if (!peer_has_vnet_hdr(n))
655 return 0;
656
657 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
658
659 return n->has_ufo;
660 }
661
662 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
663 int version_1, int hash_report)
664 {
665 int i;
666 NetClientState *nc;
667
668 n->mergeable_rx_bufs = mergeable_rx_bufs;
669
670 if (version_1) {
671 n->guest_hdr_len = hash_report ?
672 sizeof(struct virtio_net_hdr_v1_hash) :
673 sizeof(struct virtio_net_hdr_mrg_rxbuf);
674 n->rss_data.populate_hash = !!hash_report;
675 } else {
676 n->guest_hdr_len = n->mergeable_rx_bufs ?
677 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
678 sizeof(struct virtio_net_hdr);
679 }
680
681 for (i = 0; i < n->max_queue_pairs; i++) {
682 nc = qemu_get_subqueue(n->nic, i);
683
684 if (peer_has_vnet_hdr(n) &&
685 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
686 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
687 n->host_hdr_len = n->guest_hdr_len;
688 }
689 }
690 }
691
692 static int virtio_net_max_tx_queue_size(VirtIONet *n)
693 {
694 NetClientState *peer = n->nic_conf.peers.ncs[0];
695
696 /*
697 * Backends other than vhost-user or vhost-vdpa don't support max queue
698 * size.
699 */
700 if (!peer) {
701 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
702 }
703
704 switch(peer->info->type) {
705 case NET_CLIENT_DRIVER_VHOST_USER:
706 case NET_CLIENT_DRIVER_VHOST_VDPA:
707 return VIRTQUEUE_MAX_SIZE;
708 default:
709 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
710 };
711 }
712
713 static int peer_attach(VirtIONet *n, int index)
714 {
715 NetClientState *nc = qemu_get_subqueue(n->nic, index);
716
717 if (!nc->peer) {
718 return 0;
719 }
720
721 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
722 vhost_set_vring_enable(nc->peer, 1);
723 }
724
725 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
726 return 0;
727 }
728
729 if (n->max_queue_pairs == 1) {
730 return 0;
731 }
732
733 return tap_enable(nc->peer);
734 }
735
736 static int peer_detach(VirtIONet *n, int index)
737 {
738 NetClientState *nc = qemu_get_subqueue(n->nic, index);
739
740 if (!nc->peer) {
741 return 0;
742 }
743
744 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
745 vhost_set_vring_enable(nc->peer, 0);
746 }
747
748 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
749 return 0;
750 }
751
752 return tap_disable(nc->peer);
753 }
754
755 static void virtio_net_set_queue_pairs(VirtIONet *n)
756 {
757 int i;
758 int r;
759
760 if (n->nic->peer_deleted) {
761 return;
762 }
763
764 for (i = 0; i < n->max_queue_pairs; i++) {
765 if (i < n->curr_queue_pairs) {
766 r = peer_attach(n, i);
767 assert(!r);
768 } else {
769 r = peer_detach(n, i);
770 assert(!r);
771 }
772 }
773 }
774
775 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
776
777 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
778 Error **errp)
779 {
780 VirtIONet *n = VIRTIO_NET(vdev);
781 NetClientState *nc = qemu_get_queue(n->nic);
782
783 /* Firstly sync all virtio-net possible supported features */
784 features |= n->host_features;
785
786 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
787
788 if (!peer_has_vnet_hdr(n)) {
789 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
790 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
791 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
792 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
793
794 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
795 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
796 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
797 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
798
799 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
800 }
801
802 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
803 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
804 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
805 }
806
807 if (!get_vhost_net(nc->peer)) {
808 return features;
809 }
810
811 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
812 virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
813 }
814 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
815 vdev->backend_features = features;
816
817 if (n->mtu_bypass_backend &&
818 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
819 features |= (1ULL << VIRTIO_NET_F_MTU);
820 }
821
822 /*
823 * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
824 * enabled. This happens in the vDPA case.
825 *
826 * Make sure the feature set is not incoherent, as the driver could refuse
827 * to start.
828 *
829 * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
830 * helping guest to notify the new location with vDPA devices that does not
831 * support it.
832 */
833 if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
834 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
835 }
836
837 return features;
838 }
839
840 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
841 {
842 uint64_t features = 0;
843
844 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
845 * but also these: */
846 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
847 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
848 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
849 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
850 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
851
852 return features;
853 }
854
855 static void virtio_net_apply_guest_offloads(VirtIONet *n)
856 {
857 qemu_set_offload(qemu_get_queue(n->nic)->peer,
858 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
859 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
860 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
861 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
862 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
863 }
864
865 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
866 {
867 static const uint64_t guest_offloads_mask =
868 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
869 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
870 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
871 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
872 (1ULL << VIRTIO_NET_F_GUEST_UFO);
873
874 return guest_offloads_mask & features;
875 }
876
877 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
878 {
879 VirtIODevice *vdev = VIRTIO_DEVICE(n);
880 return virtio_net_guest_offloads_by_features(vdev->guest_features);
881 }
882
883 typedef struct {
884 VirtIONet *n;
885 DeviceState *dev;
886 } FailoverDevice;
887
888 /**
889 * Set the failover primary device
890 *
891 * @opaque: FailoverId to setup
892 * @opts: opts for device we are handling
893 * @errp: returns an error if this function fails
894 */
895 static int failover_set_primary(DeviceState *dev, void *opaque)
896 {
897 FailoverDevice *fdev = opaque;
898 PCIDevice *pci_dev = (PCIDevice *)
899 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
900
901 if (!pci_dev) {
902 return 0;
903 }
904
905 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
906 fdev->dev = dev;
907 return 1;
908 }
909
910 return 0;
911 }
912
913 /**
914 * Find the primary device for this failover virtio-net
915 *
916 * @n: VirtIONet device
917 * @errp: returns an error if this function fails
918 */
919 static DeviceState *failover_find_primary_device(VirtIONet *n)
920 {
921 FailoverDevice fdev = {
922 .n = n,
923 };
924
925 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
926 NULL, NULL, &fdev);
927 return fdev.dev;
928 }
929
930 static void failover_add_primary(VirtIONet *n, Error **errp)
931 {
932 Error *err = NULL;
933 DeviceState *dev = failover_find_primary_device(n);
934
935 if (dev) {
936 return;
937 }
938
939 if (!n->primary_opts) {
940 error_setg(errp, "Primary device not found");
941 error_append_hint(errp, "Virtio-net failover will not work. Make "
942 "sure primary device has parameter"
943 " failover_pair_id=%s\n", n->netclient_name);
944 return;
945 }
946
947 dev = qdev_device_add_from_qdict(n->primary_opts,
948 n->primary_opts_from_json,
949 &err);
950 if (err) {
951 qobject_unref(n->primary_opts);
952 n->primary_opts = NULL;
953 } else {
954 object_unref(OBJECT(dev));
955 }
956 error_propagate(errp, err);
957 }
958
959 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
960 {
961 VirtIONet *n = VIRTIO_NET(vdev);
962 Error *err = NULL;
963 int i;
964
965 if (n->mtu_bypass_backend &&
966 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
967 features &= ~(1ULL << VIRTIO_NET_F_MTU);
968 }
969
970 virtio_net_set_multiqueue(n,
971 virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
972 virtio_has_feature(features, VIRTIO_NET_F_MQ));
973
974 virtio_net_set_mrg_rx_bufs(n,
975 virtio_has_feature(features,
976 VIRTIO_NET_F_MRG_RXBUF),
977 virtio_has_feature(features,
978 VIRTIO_F_VERSION_1),
979 virtio_has_feature(features,
980 VIRTIO_NET_F_HASH_REPORT));
981
982 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
983 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
984 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
985 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
986 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
987
988 if (n->has_vnet_hdr) {
989 n->curr_guest_offloads =
990 virtio_net_guest_offloads_by_features(features);
991 virtio_net_apply_guest_offloads(n);
992 }
993
994 for (i = 0; i < n->max_queue_pairs; i++) {
995 NetClientState *nc = qemu_get_subqueue(n->nic, i);
996
997 if (!get_vhost_net(nc->peer)) {
998 continue;
999 }
1000 vhost_net_ack_features(get_vhost_net(nc->peer), features);
1001
1002 /*
1003 * keep acked_features in NetVhostUserState up-to-date so it
1004 * can't miss any features configured by guest virtio driver.
1005 */
1006 vhost_net_save_acked_features(nc->peer);
1007 }
1008
1009 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
1010 memset(n->vlans, 0, MAX_VLAN >> 3);
1011 } else {
1012 memset(n->vlans, 0xff, MAX_VLAN >> 3);
1013 }
1014
1015 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1016 qapi_event_send_failover_negotiated(n->netclient_name);
1017 qatomic_set(&n->failover_primary_hidden, false);
1018 failover_add_primary(n, &err);
1019 if (err) {
1020 if (!qtest_enabled()) {
1021 warn_report_err(err);
1022 } else {
1023 error_free(err);
1024 }
1025 }
1026 }
1027 }
1028
1029 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1030 struct iovec *iov, unsigned int iov_cnt)
1031 {
1032 uint8_t on;
1033 size_t s;
1034 NetClientState *nc = qemu_get_queue(n->nic);
1035
1036 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1037 if (s != sizeof(on)) {
1038 return VIRTIO_NET_ERR;
1039 }
1040
1041 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1042 n->promisc = on;
1043 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1044 n->allmulti = on;
1045 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1046 n->alluni = on;
1047 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1048 n->nomulti = on;
1049 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1050 n->nouni = on;
1051 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1052 n->nobcast = on;
1053 } else {
1054 return VIRTIO_NET_ERR;
1055 }
1056
1057 rxfilter_notify(nc);
1058
1059 return VIRTIO_NET_OK;
1060 }
1061
1062 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1063 struct iovec *iov, unsigned int iov_cnt)
1064 {
1065 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1066 uint64_t offloads;
1067 size_t s;
1068
1069 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1070 return VIRTIO_NET_ERR;
1071 }
1072
1073 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1074 if (s != sizeof(offloads)) {
1075 return VIRTIO_NET_ERR;
1076 }
1077
1078 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1079 uint64_t supported_offloads;
1080
1081 offloads = virtio_ldq_p(vdev, &offloads);
1082
1083 if (!n->has_vnet_hdr) {
1084 return VIRTIO_NET_ERR;
1085 }
1086
1087 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1088 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1089 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1090 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1091 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1092
1093 supported_offloads = virtio_net_supported_guest_offloads(n);
1094 if (offloads & ~supported_offloads) {
1095 return VIRTIO_NET_ERR;
1096 }
1097
1098 n->curr_guest_offloads = offloads;
1099 virtio_net_apply_guest_offloads(n);
1100
1101 return VIRTIO_NET_OK;
1102 } else {
1103 return VIRTIO_NET_ERR;
1104 }
1105 }
1106
1107 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1108 struct iovec *iov, unsigned int iov_cnt)
1109 {
1110 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1111 struct virtio_net_ctrl_mac mac_data;
1112 size_t s;
1113 NetClientState *nc = qemu_get_queue(n->nic);
1114
1115 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1116 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1117 return VIRTIO_NET_ERR;
1118 }
1119 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1120 assert(s == sizeof(n->mac));
1121 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1122 rxfilter_notify(nc);
1123
1124 return VIRTIO_NET_OK;
1125 }
1126
1127 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1128 return VIRTIO_NET_ERR;
1129 }
1130
1131 int in_use = 0;
1132 int first_multi = 0;
1133 uint8_t uni_overflow = 0;
1134 uint8_t multi_overflow = 0;
1135 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1136
1137 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1138 sizeof(mac_data.entries));
1139 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1140 if (s != sizeof(mac_data.entries)) {
1141 goto error;
1142 }
1143 iov_discard_front(&iov, &iov_cnt, s);
1144
1145 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1146 goto error;
1147 }
1148
1149 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1150 s = iov_to_buf(iov, iov_cnt, 0, macs,
1151 mac_data.entries * ETH_ALEN);
1152 if (s != mac_data.entries * ETH_ALEN) {
1153 goto error;
1154 }
1155 in_use += mac_data.entries;
1156 } else {
1157 uni_overflow = 1;
1158 }
1159
1160 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1161
1162 first_multi = in_use;
1163
1164 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1165 sizeof(mac_data.entries));
1166 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1167 if (s != sizeof(mac_data.entries)) {
1168 goto error;
1169 }
1170
1171 iov_discard_front(&iov, &iov_cnt, s);
1172
1173 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1174 goto error;
1175 }
1176
1177 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1178 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1179 mac_data.entries * ETH_ALEN);
1180 if (s != mac_data.entries * ETH_ALEN) {
1181 goto error;
1182 }
1183 in_use += mac_data.entries;
1184 } else {
1185 multi_overflow = 1;
1186 }
1187
1188 n->mac_table.in_use = in_use;
1189 n->mac_table.first_multi = first_multi;
1190 n->mac_table.uni_overflow = uni_overflow;
1191 n->mac_table.multi_overflow = multi_overflow;
1192 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1193 g_free(macs);
1194 rxfilter_notify(nc);
1195
1196 return VIRTIO_NET_OK;
1197
1198 error:
1199 g_free(macs);
1200 return VIRTIO_NET_ERR;
1201 }
1202
1203 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1204 struct iovec *iov, unsigned int iov_cnt)
1205 {
1206 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1207 uint16_t vid;
1208 size_t s;
1209 NetClientState *nc = qemu_get_queue(n->nic);
1210
1211 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1212 vid = virtio_lduw_p(vdev, &vid);
1213 if (s != sizeof(vid)) {
1214 return VIRTIO_NET_ERR;
1215 }
1216
1217 if (vid >= MAX_VLAN)
1218 return VIRTIO_NET_ERR;
1219
1220 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1221 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1222 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1223 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1224 else
1225 return VIRTIO_NET_ERR;
1226
1227 rxfilter_notify(nc);
1228
1229 return VIRTIO_NET_OK;
1230 }
1231
1232 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1233 struct iovec *iov, unsigned int iov_cnt)
1234 {
1235 trace_virtio_net_handle_announce(n->announce_timer.round);
1236 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1237 n->status & VIRTIO_NET_S_ANNOUNCE) {
1238 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1239 if (n->announce_timer.round) {
1240 qemu_announce_timer_step(&n->announce_timer);
1241 }
1242 return VIRTIO_NET_OK;
1243 } else {
1244 return VIRTIO_NET_ERR;
1245 }
1246 }
1247
1248 static void virtio_net_detach_epbf_rss(VirtIONet *n);
1249
1250 static void virtio_net_disable_rss(VirtIONet *n)
1251 {
1252 if (n->rss_data.enabled) {
1253 trace_virtio_net_rss_disable();
1254 }
1255 n->rss_data.enabled = false;
1256
1257 virtio_net_detach_epbf_rss(n);
1258 }
1259
1260 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1261 {
1262 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1263 if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1264 return false;
1265 }
1266
1267 return nc->info->set_steering_ebpf(nc, prog_fd);
1268 }
1269
1270 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1271 struct EBPFRSSConfig *config)
1272 {
1273 config->redirect = data->redirect;
1274 config->populate_hash = data->populate_hash;
1275 config->hash_types = data->hash_types;
1276 config->indirections_len = data->indirections_len;
1277 config->default_queue = data->default_queue;
1278 }
1279
1280 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1281 {
1282 struct EBPFRSSConfig config = {};
1283
1284 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1285 return false;
1286 }
1287
1288 rss_data_to_rss_config(&n->rss_data, &config);
1289
1290 if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1291 n->rss_data.indirections_table, n->rss_data.key)) {
1292 return false;
1293 }
1294
1295 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1296 return false;
1297 }
1298
1299 return true;
1300 }
1301
1302 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1303 {
1304 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1305 }
1306
1307 static bool virtio_net_load_ebpf(VirtIONet *n)
1308 {
1309 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1310 /* backend does't support steering ebpf */
1311 return false;
1312 }
1313
1314 return ebpf_rss_load(&n->ebpf_rss);
1315 }
1316
1317 static void virtio_net_unload_ebpf(VirtIONet *n)
1318 {
1319 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1320 ebpf_rss_unload(&n->ebpf_rss);
1321 }
1322
1323 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1324 struct iovec *iov,
1325 unsigned int iov_cnt,
1326 bool do_rss)
1327 {
1328 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1329 struct virtio_net_rss_config cfg;
1330 size_t s, offset = 0, size_get;
1331 uint16_t queue_pairs, i;
1332 struct {
1333 uint16_t us;
1334 uint8_t b;
1335 } QEMU_PACKED temp;
1336 const char *err_msg = "";
1337 uint32_t err_value = 0;
1338
1339 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1340 err_msg = "RSS is not negotiated";
1341 goto error;
1342 }
1343 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1344 err_msg = "Hash report is not negotiated";
1345 goto error;
1346 }
1347 size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1348 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1349 if (s != size_get) {
1350 err_msg = "Short command buffer";
1351 err_value = (uint32_t)s;
1352 goto error;
1353 }
1354 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1355 n->rss_data.indirections_len =
1356 virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1357 n->rss_data.indirections_len++;
1358 if (!do_rss) {
1359 n->rss_data.indirections_len = 1;
1360 }
1361 if (!is_power_of_2(n->rss_data.indirections_len)) {
1362 err_msg = "Invalid size of indirection table";
1363 err_value = n->rss_data.indirections_len;
1364 goto error;
1365 }
1366 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1367 err_msg = "Too large indirection table";
1368 err_value = n->rss_data.indirections_len;
1369 goto error;
1370 }
1371 n->rss_data.default_queue = do_rss ?
1372 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1373 if (n->rss_data.default_queue >= n->max_queue_pairs) {
1374 err_msg = "Invalid default queue";
1375 err_value = n->rss_data.default_queue;
1376 goto error;
1377 }
1378 offset += size_get;
1379 size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1380 g_free(n->rss_data.indirections_table);
1381 n->rss_data.indirections_table = g_malloc(size_get);
1382 if (!n->rss_data.indirections_table) {
1383 err_msg = "Can't allocate indirections table";
1384 err_value = n->rss_data.indirections_len;
1385 goto error;
1386 }
1387 s = iov_to_buf(iov, iov_cnt, offset,
1388 n->rss_data.indirections_table, size_get);
1389 if (s != size_get) {
1390 err_msg = "Short indirection table buffer";
1391 err_value = (uint32_t)s;
1392 goto error;
1393 }
1394 for (i = 0; i < n->rss_data.indirections_len; ++i) {
1395 uint16_t val = n->rss_data.indirections_table[i];
1396 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1397 }
1398 offset += size_get;
1399 size_get = sizeof(temp);
1400 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1401 if (s != size_get) {
1402 err_msg = "Can't get queue_pairs";
1403 err_value = (uint32_t)s;
1404 goto error;
1405 }
1406 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1407 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1408 err_msg = "Invalid number of queue_pairs";
1409 err_value = queue_pairs;
1410 goto error;
1411 }
1412 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1413 err_msg = "Invalid key size";
1414 err_value = temp.b;
1415 goto error;
1416 }
1417 if (!temp.b && n->rss_data.hash_types) {
1418 err_msg = "No key provided";
1419 err_value = 0;
1420 goto error;
1421 }
1422 if (!temp.b && !n->rss_data.hash_types) {
1423 virtio_net_disable_rss(n);
1424 return queue_pairs;
1425 }
1426 offset += size_get;
1427 size_get = temp.b;
1428 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1429 if (s != size_get) {
1430 err_msg = "Can get key buffer";
1431 err_value = (uint32_t)s;
1432 goto error;
1433 }
1434 n->rss_data.enabled = true;
1435
1436 if (!n->rss_data.populate_hash) {
1437 if (!virtio_net_attach_epbf_rss(n)) {
1438 /* EBPF must be loaded for vhost */
1439 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1440 warn_report("Can't load eBPF RSS for vhost");
1441 goto error;
1442 }
1443 /* fallback to software RSS */
1444 warn_report("Can't load eBPF RSS - fallback to software RSS");
1445 n->rss_data.enabled_software_rss = true;
1446 }
1447 } else {
1448 /* use software RSS for hash populating */
1449 /* and detach eBPF if was loaded before */
1450 virtio_net_detach_epbf_rss(n);
1451 n->rss_data.enabled_software_rss = true;
1452 }
1453
1454 trace_virtio_net_rss_enable(n->rss_data.hash_types,
1455 n->rss_data.indirections_len,
1456 temp.b);
1457 return queue_pairs;
1458 error:
1459 trace_virtio_net_rss_error(err_msg, err_value);
1460 virtio_net_disable_rss(n);
1461 return 0;
1462 }
1463
1464 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1465 struct iovec *iov, unsigned int iov_cnt)
1466 {
1467 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1468 uint16_t queue_pairs;
1469 NetClientState *nc = qemu_get_queue(n->nic);
1470
1471 virtio_net_disable_rss(n);
1472 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1473 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1474 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1475 }
1476 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1477 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1478 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1479 struct virtio_net_ctrl_mq mq;
1480 size_t s;
1481 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1482 return VIRTIO_NET_ERR;
1483 }
1484 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1485 if (s != sizeof(mq)) {
1486 return VIRTIO_NET_ERR;
1487 }
1488 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1489
1490 } else {
1491 return VIRTIO_NET_ERR;
1492 }
1493
1494 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1495 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1496 queue_pairs > n->max_queue_pairs ||
1497 !n->multiqueue) {
1498 return VIRTIO_NET_ERR;
1499 }
1500
1501 n->curr_queue_pairs = queue_pairs;
1502 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1503 /*
1504 * Avoid updating the backend for a vdpa device: We're only interested
1505 * in updating the device model queues.
1506 */
1507 return VIRTIO_NET_OK;
1508 }
1509 /* stop the backend before changing the number of queue_pairs to avoid handling a
1510 * disabled queue */
1511 virtio_net_set_status(vdev, vdev->status);
1512 virtio_net_set_queue_pairs(n);
1513
1514 return VIRTIO_NET_OK;
1515 }
1516
1517 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1518 const struct iovec *in_sg, unsigned in_num,
1519 const struct iovec *out_sg,
1520 unsigned out_num)
1521 {
1522 VirtIONet *n = VIRTIO_NET(vdev);
1523 struct virtio_net_ctrl_hdr ctrl;
1524 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1525 size_t s;
1526 struct iovec *iov, *iov2;
1527
1528 if (iov_size(in_sg, in_num) < sizeof(status) ||
1529 iov_size(out_sg, out_num) < sizeof(ctrl)) {
1530 virtio_error(vdev, "virtio-net ctrl missing headers");
1531 return 0;
1532 }
1533
1534 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1535 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1536 iov_discard_front(&iov, &out_num, sizeof(ctrl));
1537 if (s != sizeof(ctrl)) {
1538 status = VIRTIO_NET_ERR;
1539 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1540 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1541 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1542 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1543 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1544 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1545 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1546 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1547 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1548 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1549 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1550 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1551 }
1552
1553 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1554 assert(s == sizeof(status));
1555
1556 g_free(iov2);
1557 return sizeof(status);
1558 }
1559
1560 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1561 {
1562 VirtQueueElement *elem;
1563
1564 for (;;) {
1565 size_t written;
1566 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1567 if (!elem) {
1568 break;
1569 }
1570
1571 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1572 elem->out_sg, elem->out_num);
1573 if (written > 0) {
1574 virtqueue_push(vq, elem, written);
1575 virtio_notify(vdev, vq);
1576 g_free(elem);
1577 } else {
1578 virtqueue_detach_element(vq, elem, 0);
1579 g_free(elem);
1580 break;
1581 }
1582 }
1583 }
1584
1585 /* RX */
1586
1587 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1588 {
1589 VirtIONet *n = VIRTIO_NET(vdev);
1590 int queue_index = vq2q(virtio_get_queue_index(vq));
1591
1592 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1593 }
1594
1595 static bool virtio_net_can_receive(NetClientState *nc)
1596 {
1597 VirtIONet *n = qemu_get_nic_opaque(nc);
1598 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1599 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1600
1601 if (!vdev->vm_running) {
1602 return false;
1603 }
1604
1605 if (nc->queue_index >= n->curr_queue_pairs) {
1606 return false;
1607 }
1608
1609 if (!virtio_queue_ready(q->rx_vq) ||
1610 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1611 return false;
1612 }
1613
1614 return true;
1615 }
1616
1617 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1618 {
1619 VirtIONet *n = q->n;
1620 if (virtio_queue_empty(q->rx_vq) ||
1621 (n->mergeable_rx_bufs &&
1622 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1623 virtio_queue_set_notification(q->rx_vq, 1);
1624
1625 /* To avoid a race condition where the guest has made some buffers
1626 * available after the above check but before notification was
1627 * enabled, check for available buffers again.
1628 */
1629 if (virtio_queue_empty(q->rx_vq) ||
1630 (n->mergeable_rx_bufs &&
1631 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1632 return 0;
1633 }
1634 }
1635
1636 virtio_queue_set_notification(q->rx_vq, 0);
1637 return 1;
1638 }
1639
1640 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1641 {
1642 virtio_tswap16s(vdev, &hdr->hdr_len);
1643 virtio_tswap16s(vdev, &hdr->gso_size);
1644 virtio_tswap16s(vdev, &hdr->csum_start);
1645 virtio_tswap16s(vdev, &hdr->csum_offset);
1646 }
1647
1648 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1649 * it never finds out that the packets don't have valid checksums. This
1650 * causes dhclient to get upset. Fedora's carried a patch for ages to
1651 * fix this with Xen but it hasn't appeared in an upstream release of
1652 * dhclient yet.
1653 *
1654 * To avoid breaking existing guests, we catch udp packets and add
1655 * checksums. This is terrible but it's better than hacking the guest
1656 * kernels.
1657 *
1658 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1659 * we should provide a mechanism to disable it to avoid polluting the host
1660 * cache.
1661 */
1662 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1663 uint8_t *buf, size_t size)
1664 {
1665 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1666 (size > 27 && size < 1500) && /* normal sized MTU */
1667 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1668 (buf[23] == 17) && /* ip.protocol == UDP */
1669 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1670 net_checksum_calculate(buf, size, CSUM_UDP);
1671 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1672 }
1673 }
1674
1675 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1676 const void *buf, size_t size)
1677 {
1678 if (n->has_vnet_hdr) {
1679 /* FIXME this cast is evil */
1680 void *wbuf = (void *)buf;
1681 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1682 size - n->host_hdr_len);
1683
1684 if (n->needs_vnet_hdr_swap) {
1685 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1686 }
1687 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1688 } else {
1689 struct virtio_net_hdr hdr = {
1690 .flags = 0,
1691 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1692 };
1693 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1694 }
1695 }
1696
1697 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1698 {
1699 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1700 static const uint8_t vlan[] = {0x81, 0x00};
1701 uint8_t *ptr = (uint8_t *)buf;
1702 int i;
1703
1704 if (n->promisc)
1705 return 1;
1706
1707 ptr += n->host_hdr_len;
1708
1709 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1710 int vid = lduw_be_p(ptr + 14) & 0xfff;
1711 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1712 return 0;
1713 }
1714
1715 if (ptr[0] & 1) { // multicast
1716 if (!memcmp(ptr, bcast, sizeof(bcast))) {
1717 return !n->nobcast;
1718 } else if (n->nomulti) {
1719 return 0;
1720 } else if (n->allmulti || n->mac_table.multi_overflow) {
1721 return 1;
1722 }
1723
1724 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1725 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1726 return 1;
1727 }
1728 }
1729 } else { // unicast
1730 if (n->nouni) {
1731 return 0;
1732 } else if (n->alluni || n->mac_table.uni_overflow) {
1733 return 1;
1734 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1735 return 1;
1736 }
1737
1738 for (i = 0; i < n->mac_table.first_multi; i++) {
1739 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1740 return 1;
1741 }
1742 }
1743 }
1744
1745 return 0;
1746 }
1747
1748 static uint8_t virtio_net_get_hash_type(bool hasip4,
1749 bool hasip6,
1750 EthL4HdrProto l4hdr_proto,
1751 uint32_t types)
1752 {
1753 if (hasip4) {
1754 switch (l4hdr_proto) {
1755 case ETH_L4_HDR_PROTO_TCP:
1756 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1757 return NetPktRssIpV4Tcp;
1758 }
1759 break;
1760
1761 case ETH_L4_HDR_PROTO_UDP:
1762 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1763 return NetPktRssIpV4Udp;
1764 }
1765 break;
1766
1767 default:
1768 break;
1769 }
1770
1771 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1772 return NetPktRssIpV4;
1773 }
1774 } else if (hasip6) {
1775 switch (l4hdr_proto) {
1776 case ETH_L4_HDR_PROTO_TCP:
1777 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1778 return NetPktRssIpV6TcpEx;
1779 }
1780 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1781 return NetPktRssIpV6Tcp;
1782 }
1783 break;
1784
1785 case ETH_L4_HDR_PROTO_UDP:
1786 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1787 return NetPktRssIpV6UdpEx;
1788 }
1789 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1790 return NetPktRssIpV6Udp;
1791 }
1792 break;
1793
1794 default:
1795 break;
1796 }
1797
1798 if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1799 return NetPktRssIpV6Ex;
1800 }
1801 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1802 return NetPktRssIpV6;
1803 }
1804 }
1805 return 0xff;
1806 }
1807
1808 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1809 uint32_t hash)
1810 {
1811 struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1812 hdr->hash_value = hash;
1813 hdr->hash_report = report;
1814 }
1815
1816 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1817 size_t size)
1818 {
1819 VirtIONet *n = qemu_get_nic_opaque(nc);
1820 unsigned int index = nc->queue_index, new_index = index;
1821 struct NetRxPkt *pkt = n->rx_pkt;
1822 uint8_t net_hash_type;
1823 uint32_t hash;
1824 bool hasip4, hasip6;
1825 EthL4HdrProto l4hdr_proto;
1826 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1827 VIRTIO_NET_HASH_REPORT_IPv4,
1828 VIRTIO_NET_HASH_REPORT_TCPv4,
1829 VIRTIO_NET_HASH_REPORT_TCPv6,
1830 VIRTIO_NET_HASH_REPORT_IPv6,
1831 VIRTIO_NET_HASH_REPORT_IPv6_EX,
1832 VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1833 VIRTIO_NET_HASH_REPORT_UDPv4,
1834 VIRTIO_NET_HASH_REPORT_UDPv6,
1835 VIRTIO_NET_HASH_REPORT_UDPv6_EX
1836 };
1837 struct iovec iov = {
1838 .iov_base = (void *)buf,
1839 .iov_len = size
1840 };
1841
1842 net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
1843 net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1844 net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
1845 n->rss_data.hash_types);
1846 if (net_hash_type > NetPktRssIpV6UdpEx) {
1847 if (n->rss_data.populate_hash) {
1848 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1849 }
1850 return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1851 }
1852
1853 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1854
1855 if (n->rss_data.populate_hash) {
1856 virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1857 }
1858
1859 if (n->rss_data.redirect) {
1860 new_index = hash & (n->rss_data.indirections_len - 1);
1861 new_index = n->rss_data.indirections_table[new_index];
1862 }
1863
1864 return (index == new_index) ? -1 : new_index;
1865 }
1866
1867 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1868 size_t size, bool no_rss)
1869 {
1870 VirtIONet *n = qemu_get_nic_opaque(nc);
1871 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1872 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1873 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1874 size_t lens[VIRTQUEUE_MAX_SIZE];
1875 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1876 struct virtio_net_hdr_mrg_rxbuf mhdr;
1877 unsigned mhdr_cnt = 0;
1878 size_t offset, i, guest_offset, j;
1879 ssize_t err;
1880
1881 if (!virtio_net_can_receive(nc)) {
1882 return -1;
1883 }
1884
1885 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1886 int index = virtio_net_process_rss(nc, buf, size);
1887 if (index >= 0) {
1888 NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1889 return virtio_net_receive_rcu(nc2, buf, size, true);
1890 }
1891 }
1892
1893 /* hdr_len refers to the header we supply to the guest */
1894 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1895 return 0;
1896 }
1897
1898 if (!receive_filter(n, buf, size))
1899 return size;
1900
1901 offset = i = 0;
1902
1903 while (offset < size) {
1904 VirtQueueElement *elem;
1905 int len, total;
1906 const struct iovec *sg;
1907
1908 total = 0;
1909
1910 if (i == VIRTQUEUE_MAX_SIZE) {
1911 virtio_error(vdev, "virtio-net unexpected long buffer chain");
1912 err = size;
1913 goto err;
1914 }
1915
1916 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1917 if (!elem) {
1918 if (i) {
1919 virtio_error(vdev, "virtio-net unexpected empty queue: "
1920 "i %zd mergeable %d offset %zd, size %zd, "
1921 "guest hdr len %zd, host hdr len %zd "
1922 "guest features 0x%" PRIx64,
1923 i, n->mergeable_rx_bufs, offset, size,
1924 n->guest_hdr_len, n->host_hdr_len,
1925 vdev->guest_features);
1926 }
1927 err = -1;
1928 goto err;
1929 }
1930
1931 if (elem->in_num < 1) {
1932 virtio_error(vdev,
1933 "virtio-net receive queue contains no in buffers");
1934 virtqueue_detach_element(q->rx_vq, elem, 0);
1935 g_free(elem);
1936 err = -1;
1937 goto err;
1938 }
1939
1940 sg = elem->in_sg;
1941 if (i == 0) {
1942 assert(offset == 0);
1943 if (n->mergeable_rx_bufs) {
1944 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1945 sg, elem->in_num,
1946 offsetof(typeof(mhdr), num_buffers),
1947 sizeof(mhdr.num_buffers));
1948 }
1949
1950 receive_header(n, sg, elem->in_num, buf, size);
1951 if (n->rss_data.populate_hash) {
1952 offset = sizeof(mhdr);
1953 iov_from_buf(sg, elem->in_num, offset,
1954 buf + offset, n->host_hdr_len - sizeof(mhdr));
1955 }
1956 offset = n->host_hdr_len;
1957 total += n->guest_hdr_len;
1958 guest_offset = n->guest_hdr_len;
1959 } else {
1960 guest_offset = 0;
1961 }
1962
1963 /* copy in packet. ugh */
1964 len = iov_from_buf(sg, elem->in_num, guest_offset,
1965 buf + offset, size - offset);
1966 total += len;
1967 offset += len;
1968 /* If buffers can't be merged, at this point we
1969 * must have consumed the complete packet.
1970 * Otherwise, drop it. */
1971 if (!n->mergeable_rx_bufs && offset < size) {
1972 virtqueue_unpop(q->rx_vq, elem, total);
1973 g_free(elem);
1974 err = size;
1975 goto err;
1976 }
1977
1978 elems[i] = elem;
1979 lens[i] = total;
1980 i++;
1981 }
1982
1983 if (mhdr_cnt) {
1984 virtio_stw_p(vdev, &mhdr.num_buffers, i);
1985 iov_from_buf(mhdr_sg, mhdr_cnt,
1986 0,
1987 &mhdr.num_buffers, sizeof mhdr.num_buffers);
1988 }
1989
1990 for (j = 0; j < i; j++) {
1991 /* signal other side */
1992 virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1993 g_free(elems[j]);
1994 }
1995
1996 virtqueue_flush(q->rx_vq, i);
1997 virtio_notify(vdev, q->rx_vq);
1998
1999 return size;
2000
2001 err:
2002 for (j = 0; j < i; j++) {
2003 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
2004 g_free(elems[j]);
2005 }
2006
2007 return err;
2008 }
2009
2010 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
2011 size_t size)
2012 {
2013 RCU_READ_LOCK_GUARD();
2014
2015 return virtio_net_receive_rcu(nc, buf, size, false);
2016 }
2017
2018 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2019 const uint8_t *buf,
2020 VirtioNetRscUnit *unit)
2021 {
2022 uint16_t ip_hdrlen;
2023 struct ip_header *ip;
2024
2025 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2026 + sizeof(struct eth_header));
2027 unit->ip = (void *)ip;
2028 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2029 unit->ip_plen = &ip->ip_len;
2030 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2031 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2032 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
2033 }
2034
2035 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2036 const uint8_t *buf,
2037 VirtioNetRscUnit *unit)
2038 {
2039 struct ip6_header *ip6;
2040
2041 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2042 + sizeof(struct eth_header));
2043 unit->ip = ip6;
2044 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2045 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2046 + sizeof(struct ip6_header));
2047 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2048
2049 /* There is a difference between payload lenght in ipv4 and v6,
2050 ip header is excluded in ipv6 */
2051 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
2052 }
2053
2054 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2055 VirtioNetRscSeg *seg)
2056 {
2057 int ret;
2058 struct virtio_net_hdr_v1 *h;
2059
2060 h = (struct virtio_net_hdr_v1 *)seg->buf;
2061 h->flags = 0;
2062 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2063
2064 if (seg->is_coalesced) {
2065 h->rsc.segments = seg->packets;
2066 h->rsc.dup_acks = seg->dup_ack;
2067 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2068 if (chain->proto == ETH_P_IP) {
2069 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2070 } else {
2071 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2072 }
2073 }
2074
2075 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2076 QTAILQ_REMOVE(&chain->buffers, seg, next);
2077 g_free(seg->buf);
2078 g_free(seg);
2079
2080 return ret;
2081 }
2082
2083 static void virtio_net_rsc_purge(void *opq)
2084 {
2085 VirtioNetRscSeg *seg, *rn;
2086 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2087
2088 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2089 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2090 chain->stat.purge_failed++;
2091 continue;
2092 }
2093 }
2094
2095 chain->stat.timer++;
2096 if (!QTAILQ_EMPTY(&chain->buffers)) {
2097 timer_mod(chain->drain_timer,
2098 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2099 }
2100 }
2101
2102 static void virtio_net_rsc_cleanup(VirtIONet *n)
2103 {
2104 VirtioNetRscChain *chain, *rn_chain;
2105 VirtioNetRscSeg *seg, *rn_seg;
2106
2107 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2108 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2109 QTAILQ_REMOVE(&chain->buffers, seg, next);
2110 g_free(seg->buf);
2111 g_free(seg);
2112 }
2113
2114 timer_free(chain->drain_timer);
2115 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2116 g_free(chain);
2117 }
2118 }
2119
2120 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2121 NetClientState *nc,
2122 const uint8_t *buf, size_t size)
2123 {
2124 uint16_t hdr_len;
2125 VirtioNetRscSeg *seg;
2126
2127 hdr_len = chain->n->guest_hdr_len;
2128 seg = g_new(VirtioNetRscSeg, 1);
2129 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2130 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2131 memcpy(seg->buf, buf, size);
2132 seg->size = size;
2133 seg->packets = 1;
2134 seg->dup_ack = 0;
2135 seg->is_coalesced = 0;
2136 seg->nc = nc;
2137
2138 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2139 chain->stat.cache++;
2140
2141 switch (chain->proto) {
2142 case ETH_P_IP:
2143 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2144 break;
2145 case ETH_P_IPV6:
2146 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2147 break;
2148 default:
2149 g_assert_not_reached();
2150 }
2151 }
2152
2153 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2154 VirtioNetRscSeg *seg,
2155 const uint8_t *buf,
2156 struct tcp_header *n_tcp,
2157 struct tcp_header *o_tcp)
2158 {
2159 uint32_t nack, oack;
2160 uint16_t nwin, owin;
2161
2162 nack = htonl(n_tcp->th_ack);
2163 nwin = htons(n_tcp->th_win);
2164 oack = htonl(o_tcp->th_ack);
2165 owin = htons(o_tcp->th_win);
2166
2167 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2168 chain->stat.ack_out_of_win++;
2169 return RSC_FINAL;
2170 } else if (nack == oack) {
2171 /* duplicated ack or window probe */
2172 if (nwin == owin) {
2173 /* duplicated ack, add dup ack count due to whql test up to 1 */
2174 chain->stat.dup_ack++;
2175 return RSC_FINAL;
2176 } else {
2177 /* Coalesce window update */
2178 o_tcp->th_win = n_tcp->th_win;
2179 chain->stat.win_update++;
2180 return RSC_COALESCE;
2181 }
2182 } else {
2183 /* pure ack, go to 'C', finalize*/
2184 chain->stat.pure_ack++;
2185 return RSC_FINAL;
2186 }
2187 }
2188
2189 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2190 VirtioNetRscSeg *seg,
2191 const uint8_t *buf,
2192 VirtioNetRscUnit *n_unit)
2193 {
2194 void *data;
2195 uint16_t o_ip_len;
2196 uint32_t nseq, oseq;
2197 VirtioNetRscUnit *o_unit;
2198
2199 o_unit = &seg->unit;
2200 o_ip_len = htons(*o_unit->ip_plen);
2201 nseq = htonl(n_unit->tcp->th_seq);
2202 oseq = htonl(o_unit->tcp->th_seq);
2203
2204 /* out of order or retransmitted. */
2205 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2206 chain->stat.data_out_of_win++;
2207 return RSC_FINAL;
2208 }
2209
2210 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2211 if (nseq == oseq) {
2212 if ((o_unit->payload == 0) && n_unit->payload) {
2213 /* From no payload to payload, normal case, not a dup ack or etc */
2214 chain->stat.data_after_pure_ack++;
2215 goto coalesce;
2216 } else {
2217 return virtio_net_rsc_handle_ack(chain, seg, buf,
2218 n_unit->tcp, o_unit->tcp);
2219 }
2220 } else if ((nseq - oseq) != o_unit->payload) {
2221 /* Not a consistent packet, out of order */
2222 chain->stat.data_out_of_order++;
2223 return RSC_FINAL;
2224 } else {
2225 coalesce:
2226 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2227 chain->stat.over_size++;
2228 return RSC_FINAL;
2229 }
2230
2231 /* Here comes the right data, the payload length in v4/v6 is different,
2232 so use the field value to update and record the new data len */
2233 o_unit->payload += n_unit->payload; /* update new data len */
2234
2235 /* update field in ip header */
2236 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2237
2238 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2239 for windows guest, while this may change the behavior for linux
2240 guest (only if it uses RSC feature). */
2241 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2242
2243 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2244 o_unit->tcp->th_win = n_unit->tcp->th_win;
2245
2246 memmove(seg->buf + seg->size, data, n_unit->payload);
2247 seg->size += n_unit->payload;
2248 seg->packets++;
2249 chain->stat.coalesced++;
2250 return RSC_COALESCE;
2251 }
2252 }
2253
2254 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2255 VirtioNetRscSeg *seg,
2256 const uint8_t *buf, size_t size,
2257 VirtioNetRscUnit *unit)
2258 {
2259 struct ip_header *ip1, *ip2;
2260
2261 ip1 = (struct ip_header *)(unit->ip);
2262 ip2 = (struct ip_header *)(seg->unit.ip);
2263 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2264 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2265 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2266 chain->stat.no_match++;
2267 return RSC_NO_MATCH;
2268 }
2269
2270 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2271 }
2272
2273 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2274 VirtioNetRscSeg *seg,
2275 const uint8_t *buf, size_t size,
2276 VirtioNetRscUnit *unit)
2277 {
2278 struct ip6_header *ip1, *ip2;
2279
2280 ip1 = (struct ip6_header *)(unit->ip);
2281 ip2 = (struct ip6_header *)(seg->unit.ip);
2282 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2283 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2284 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2285 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2286 chain->stat.no_match++;
2287 return RSC_NO_MATCH;
2288 }
2289
2290 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2291 }
2292
2293 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2294 * to prevent out of order */
2295 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2296 struct tcp_header *tcp)
2297 {
2298 uint16_t tcp_hdr;
2299 uint16_t tcp_flag;
2300
2301 tcp_flag = htons(tcp->th_offset_flags);
2302 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2303 tcp_flag &= VIRTIO_NET_TCP_FLAG;
2304 if (tcp_flag & TH_SYN) {
2305 chain->stat.tcp_syn++;
2306 return RSC_BYPASS;
2307 }
2308
2309 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2310 chain->stat.tcp_ctrl_drain++;
2311 return RSC_FINAL;
2312 }
2313
2314 if (tcp_hdr > sizeof(struct tcp_header)) {
2315 chain->stat.tcp_all_opt++;
2316 return RSC_FINAL;
2317 }
2318
2319 return RSC_CANDIDATE;
2320 }
2321
2322 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2323 NetClientState *nc,
2324 const uint8_t *buf, size_t size,
2325 VirtioNetRscUnit *unit)
2326 {
2327 int ret;
2328 VirtioNetRscSeg *seg, *nseg;
2329
2330 if (QTAILQ_EMPTY(&chain->buffers)) {
2331 chain->stat.empty_cache++;
2332 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2333 timer_mod(chain->drain_timer,
2334 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2335 return size;
2336 }
2337
2338 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2339 if (chain->proto == ETH_P_IP) {
2340 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2341 } else {
2342 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2343 }
2344
2345 if (ret == RSC_FINAL) {
2346 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2347 /* Send failed */
2348 chain->stat.final_failed++;
2349 return 0;
2350 }
2351
2352 /* Send current packet */
2353 return virtio_net_do_receive(nc, buf, size);
2354 } else if (ret == RSC_NO_MATCH) {
2355 continue;
2356 } else {
2357 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2358 seg->is_coalesced = 1;
2359 return size;
2360 }
2361 }
2362
2363 chain->stat.no_match_cache++;
2364 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2365 return size;
2366 }
2367
2368 /* Drain a connection data, this is to avoid out of order segments */
2369 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2370 NetClientState *nc,
2371 const uint8_t *buf, size_t size,
2372 uint16_t ip_start, uint16_t ip_size,
2373 uint16_t tcp_port)
2374 {
2375 VirtioNetRscSeg *seg, *nseg;
2376 uint32_t ppair1, ppair2;
2377
2378 ppair1 = *(uint32_t *)(buf + tcp_port);
2379 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2380 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2381 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2382 || (ppair1 != ppair2)) {
2383 continue;
2384 }
2385 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2386 chain->stat.drain_failed++;
2387 }
2388
2389 break;
2390 }
2391
2392 return virtio_net_do_receive(nc, buf, size);
2393 }
2394
2395 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2396 struct ip_header *ip,
2397 const uint8_t *buf, size_t size)
2398 {
2399 uint16_t ip_len;
2400
2401 /* Not an ipv4 packet */
2402 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2403 chain->stat.ip_option++;
2404 return RSC_BYPASS;
2405 }
2406
2407 /* Don't handle packets with ip option */
2408 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2409 chain->stat.ip_option++;
2410 return RSC_BYPASS;
2411 }
2412
2413 if (ip->ip_p != IPPROTO_TCP) {
2414 chain->stat.bypass_not_tcp++;
2415 return RSC_BYPASS;
2416 }
2417
2418 /* Don't handle packets with ip fragment */
2419 if (!(htons(ip->ip_off) & IP_DF)) {
2420 chain->stat.ip_frag++;
2421 return RSC_BYPASS;
2422 }
2423
2424 /* Don't handle packets with ecn flag */
2425 if (IPTOS_ECN(ip->ip_tos)) {
2426 chain->stat.ip_ecn++;
2427 return RSC_BYPASS;
2428 }
2429
2430 ip_len = htons(ip->ip_len);
2431 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2432 || ip_len > (size - chain->n->guest_hdr_len -
2433 sizeof(struct eth_header))) {
2434 chain->stat.ip_hacked++;
2435 return RSC_BYPASS;
2436 }
2437
2438 return RSC_CANDIDATE;
2439 }
2440
2441 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2442 NetClientState *nc,
2443 const uint8_t *buf, size_t size)
2444 {
2445 int32_t ret;
2446 uint16_t hdr_len;
2447 VirtioNetRscUnit unit;
2448
2449 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2450
2451 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2452 + sizeof(struct tcp_header))) {
2453 chain->stat.bypass_not_tcp++;
2454 return virtio_net_do_receive(nc, buf, size);
2455 }
2456
2457 virtio_net_rsc_extract_unit4(chain, buf, &unit);
2458 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2459 != RSC_CANDIDATE) {
2460 return virtio_net_do_receive(nc, buf, size);
2461 }
2462
2463 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2464 if (ret == RSC_BYPASS) {
2465 return virtio_net_do_receive(nc, buf, size);
2466 } else if (ret == RSC_FINAL) {
2467 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2468 ((hdr_len + sizeof(struct eth_header)) + 12),
2469 VIRTIO_NET_IP4_ADDR_SIZE,
2470 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2471 }
2472
2473 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2474 }
2475
2476 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2477 struct ip6_header *ip6,
2478 const uint8_t *buf, size_t size)
2479 {
2480 uint16_t ip_len;
2481
2482 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2483 != IP_HEADER_VERSION_6) {
2484 return RSC_BYPASS;
2485 }
2486
2487 /* Both option and protocol is checked in this */
2488 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2489 chain->stat.bypass_not_tcp++;
2490 return RSC_BYPASS;
2491 }
2492
2493 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2494 if (ip_len < sizeof(struct tcp_header) ||
2495 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2496 - sizeof(struct ip6_header))) {
2497 chain->stat.ip_hacked++;
2498 return RSC_BYPASS;
2499 }
2500
2501 /* Don't handle packets with ecn flag */
2502 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2503 chain->stat.ip_ecn++;
2504 return RSC_BYPASS;
2505 }
2506
2507 return RSC_CANDIDATE;
2508 }
2509
2510 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2511 const uint8_t *buf, size_t size)
2512 {
2513 int32_t ret;
2514 uint16_t hdr_len;
2515 VirtioNetRscChain *chain;
2516 VirtioNetRscUnit unit;
2517
2518 chain = opq;
2519 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2520
2521 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2522 + sizeof(tcp_header))) {
2523 return virtio_net_do_receive(nc, buf, size);
2524 }
2525
2526 virtio_net_rsc_extract_unit6(chain, buf, &unit);
2527 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2528 unit.ip, buf, size)) {
2529 return virtio_net_do_receive(nc, buf, size);
2530 }
2531
2532 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2533 if (ret == RSC_BYPASS) {
2534 return virtio_net_do_receive(nc, buf, size);
2535 } else if (ret == RSC_FINAL) {
2536 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2537 ((hdr_len + sizeof(struct eth_header)) + 8),
2538 VIRTIO_NET_IP6_ADDR_SIZE,
2539 hdr_len + sizeof(struct eth_header)
2540 + sizeof(struct ip6_header));
2541 }
2542
2543 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2544 }
2545
2546 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2547 NetClientState *nc,
2548 uint16_t proto)
2549 {
2550 VirtioNetRscChain *chain;
2551
2552 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2553 return NULL;
2554 }
2555
2556 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2557 if (chain->proto == proto) {
2558 return chain;
2559 }
2560 }
2561
2562 chain = g_malloc(sizeof(*chain));
2563 chain->n = n;
2564 chain->proto = proto;
2565 if (proto == (uint16_t)ETH_P_IP) {
2566 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2567 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2568 } else {
2569 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2570 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2571 }
2572 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2573 virtio_net_rsc_purge, chain);
2574 memset(&chain->stat, 0, sizeof(chain->stat));
2575
2576 QTAILQ_INIT(&chain->buffers);
2577 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2578
2579 return chain;
2580 }
2581
2582 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2583 const uint8_t *buf,
2584 size_t size)
2585 {
2586 uint16_t proto;
2587 VirtioNetRscChain *chain;
2588 struct eth_header *eth;
2589 VirtIONet *n;
2590
2591 n = qemu_get_nic_opaque(nc);
2592 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2593 return virtio_net_do_receive(nc, buf, size);
2594 }
2595
2596 eth = (struct eth_header *)(buf + n->guest_hdr_len);
2597 proto = htons(eth->h_proto);
2598
2599 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2600 if (chain) {
2601 chain->stat.received++;
2602 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2603 return virtio_net_rsc_receive4(chain, nc, buf, size);
2604 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2605 return virtio_net_rsc_receive6(chain, nc, buf, size);
2606 }
2607 }
2608 return virtio_net_do_receive(nc, buf, size);
2609 }
2610
2611 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2612 size_t size)
2613 {
2614 VirtIONet *n = qemu_get_nic_opaque(nc);
2615 if ((n->rsc4_enabled || n->rsc6_enabled)) {
2616 return virtio_net_rsc_receive(nc, buf, size);
2617 } else {
2618 return virtio_net_do_receive(nc, buf, size);
2619 }
2620 }
2621
2622 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2623
2624 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2625 {
2626 VirtIONet *n = qemu_get_nic_opaque(nc);
2627 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2628 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2629 int ret;
2630
2631 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2632 virtio_notify(vdev, q->tx_vq);
2633
2634 g_free(q->async_tx.elem);
2635 q->async_tx.elem = NULL;
2636
2637 virtio_queue_set_notification(q->tx_vq, 1);
2638 ret = virtio_net_flush_tx(q);
2639 if (ret >= n->tx_burst) {
2640 /*
2641 * the flush has been stopped by tx_burst
2642 * we will not receive notification for the
2643 * remainining part, so re-schedule
2644 */
2645 virtio_queue_set_notification(q->tx_vq, 0);
2646 if (q->tx_bh) {
2647 qemu_bh_schedule(q->tx_bh);
2648 } else {
2649 timer_mod(q->tx_timer,
2650 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2651 }
2652 q->tx_waiting = 1;
2653 }
2654 }
2655
2656 /* TX */
2657 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2658 {
2659 VirtIONet *n = q->n;
2660 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2661 VirtQueueElement *elem;
2662 int32_t num_packets = 0;
2663 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2664 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2665 return num_packets;
2666 }
2667
2668 if (q->async_tx.elem) {
2669 virtio_queue_set_notification(q->tx_vq, 0);
2670 return num_packets;
2671 }
2672
2673 for (;;) {
2674 ssize_t ret;
2675 unsigned int out_num;
2676 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2677 struct virtio_net_hdr_mrg_rxbuf mhdr;
2678
2679 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2680 if (!elem) {
2681 break;
2682 }
2683
2684 out_num = elem->out_num;
2685 out_sg = elem->out_sg;
2686 if (out_num < 1) {
2687 virtio_error(vdev, "virtio-net header not in first element");
2688 virtqueue_detach_element(q->tx_vq, elem, 0);
2689 g_free(elem);
2690 return -EINVAL;
2691 }
2692
2693 if (n->has_vnet_hdr) {
2694 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2695 n->guest_hdr_len) {
2696 virtio_error(vdev, "virtio-net header incorrect");
2697 virtqueue_detach_element(q->tx_vq, elem, 0);
2698 g_free(elem);
2699 return -EINVAL;
2700 }
2701 if (n->needs_vnet_hdr_swap) {
2702 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2703 sg2[0].iov_base = &mhdr;
2704 sg2[0].iov_len = n->guest_hdr_len;
2705 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2706 out_sg, out_num,
2707 n->guest_hdr_len, -1);
2708 if (out_num == VIRTQUEUE_MAX_SIZE) {
2709 goto drop;
2710 }
2711 out_num += 1;
2712 out_sg = sg2;
2713 }
2714 }
2715 /*
2716 * If host wants to see the guest header as is, we can
2717 * pass it on unchanged. Otherwise, copy just the parts
2718 * that host is interested in.
2719 */
2720 assert(n->host_hdr_len <= n->guest_hdr_len);
2721 if (n->host_hdr_len != n->guest_hdr_len) {
2722 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2723 out_sg, out_num,
2724 0, n->host_hdr_len);
2725 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2726 out_sg, out_num,
2727 n->guest_hdr_len, -1);
2728 out_num = sg_num;
2729 out_sg = sg;
2730 }
2731
2732 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2733 out_sg, out_num, virtio_net_tx_complete);
2734 if (ret == 0) {
2735 virtio_queue_set_notification(q->tx_vq, 0);
2736 q->async_tx.elem = elem;
2737 return -EBUSY;
2738 }
2739
2740 drop:
2741 virtqueue_push(q->tx_vq, elem, 0);
2742 virtio_notify(vdev, q->tx_vq);
2743 g_free(elem);
2744
2745 if (++num_packets >= n->tx_burst) {
2746 break;
2747 }
2748 }
2749 return num_packets;
2750 }
2751
2752 static void virtio_net_tx_timer(void *opaque);
2753
2754 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2755 {
2756 VirtIONet *n = VIRTIO_NET(vdev);
2757 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2758
2759 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2760 virtio_net_drop_tx_queue_data(vdev, vq);
2761 return;
2762 }
2763
2764 /* This happens when device was stopped but VCPU wasn't. */
2765 if (!vdev->vm_running) {
2766 q->tx_waiting = 1;
2767 return;
2768 }
2769
2770 if (q->tx_waiting) {
2771 /* We already have queued packets, immediately flush */
2772 timer_del(q->tx_timer);
2773 virtio_net_tx_timer(q);
2774 } else {
2775 /* re-arm timer to flush it (and more) on next tick */
2776 timer_mod(q->tx_timer,
2777 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2778 q->tx_waiting = 1;
2779 virtio_queue_set_notification(vq, 0);
2780 }
2781 }
2782
2783 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2784 {
2785 VirtIONet *n = VIRTIO_NET(vdev);
2786 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2787
2788 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2789 virtio_net_drop_tx_queue_data(vdev, vq);
2790 return;
2791 }
2792
2793 if (unlikely(q->tx_waiting)) {
2794 return;
2795 }
2796 q->tx_waiting = 1;
2797 /* This happens when device was stopped but VCPU wasn't. */
2798 if (!vdev->vm_running) {
2799 return;
2800 }
2801 virtio_queue_set_notification(vq, 0);
2802 qemu_bh_schedule(q->tx_bh);
2803 }
2804
2805 static void virtio_net_tx_timer(void *opaque)
2806 {
2807 VirtIONetQueue *q = opaque;
2808 VirtIONet *n = q->n;
2809 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2810 int ret;
2811
2812 /* This happens when device was stopped but BH wasn't. */
2813 if (!vdev->vm_running) {
2814 /* Make sure tx waiting is set, so we'll run when restarted. */
2815 assert(q->tx_waiting);
2816 return;
2817 }
2818
2819 q->tx_waiting = 0;
2820
2821 /* Just in case the driver is not ready on more */
2822 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2823 return;
2824 }
2825
2826 ret = virtio_net_flush_tx(q);
2827 if (ret == -EBUSY || ret == -EINVAL) {
2828 return;
2829 }
2830 /*
2831 * If we flush a full burst of packets, assume there are
2832 * more coming and immediately rearm
2833 */
2834 if (ret >= n->tx_burst) {
2835 q->tx_waiting = 1;
2836 timer_mod(q->tx_timer,
2837 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2838 return;
2839 }
2840 /*
2841 * If less than a full burst, re-enable notification and flush
2842 * anything that may have come in while we weren't looking. If
2843 * we find something, assume the guest is still active and rearm
2844 */
2845 virtio_queue_set_notification(q->tx_vq, 1);
2846 ret = virtio_net_flush_tx(q);
2847 if (ret > 0) {
2848 virtio_queue_set_notification(q->tx_vq, 0);
2849 q->tx_waiting = 1;
2850 timer_mod(q->tx_timer,
2851 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2852 }
2853 }
2854
2855 static void virtio_net_tx_bh(void *opaque)
2856 {
2857 VirtIONetQueue *q = opaque;
2858 VirtIONet *n = q->n;
2859 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2860 int32_t ret;
2861
2862 /* This happens when device was stopped but BH wasn't. */
2863 if (!vdev->vm_running) {
2864 /* Make sure tx waiting is set, so we'll run when restarted. */
2865 assert(q->tx_waiting);
2866 return;
2867 }
2868
2869 q->tx_waiting = 0;
2870
2871 /* Just in case the driver is not ready on more */
2872 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2873 return;
2874 }
2875
2876 ret = virtio_net_flush_tx(q);
2877 if (ret == -EBUSY || ret == -EINVAL) {
2878 return; /* Notification re-enable handled by tx_complete or device
2879 * broken */
2880 }
2881
2882 /* If we flush a full burst of packets, assume there are
2883 * more coming and immediately reschedule */
2884 if (ret >= n->tx_burst) {
2885 qemu_bh_schedule(q->tx_bh);
2886 q->tx_waiting = 1;
2887 return;
2888 }
2889
2890 /* If less than a full burst, re-enable notification and flush
2891 * anything that may have come in while we weren't looking. If
2892 * we find something, assume the guest is still active and reschedule */
2893 virtio_queue_set_notification(q->tx_vq, 1);
2894 ret = virtio_net_flush_tx(q);
2895 if (ret == -EINVAL) {
2896 return;
2897 } else if (ret > 0) {
2898 virtio_queue_set_notification(q->tx_vq, 0);
2899 qemu_bh_schedule(q->tx_bh);
2900 q->tx_waiting = 1;
2901 }
2902 }
2903
2904 static void virtio_net_add_queue(VirtIONet *n, int index)
2905 {
2906 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2907
2908 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2909 virtio_net_handle_rx);
2910
2911 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2912 n->vqs[index].tx_vq =
2913 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2914 virtio_net_handle_tx_timer);
2915 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2916 virtio_net_tx_timer,
2917 &n->vqs[index]);
2918 } else {
2919 n->vqs[index].tx_vq =
2920 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2921 virtio_net_handle_tx_bh);
2922 n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
2923 &DEVICE(vdev)->mem_reentrancy_guard);
2924 }
2925
2926 n->vqs[index].tx_waiting = 0;
2927 n->vqs[index].n = n;
2928 }
2929
2930 static void virtio_net_del_queue(VirtIONet *n, int index)
2931 {
2932 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2933 VirtIONetQueue *q = &n->vqs[index];
2934 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2935
2936 qemu_purge_queued_packets(nc);
2937
2938 virtio_del_queue(vdev, index * 2);
2939 if (q->tx_timer) {
2940 timer_free(q->tx_timer);
2941 q->tx_timer = NULL;
2942 } else {
2943 qemu_bh_delete(q->tx_bh);
2944 q->tx_bh = NULL;
2945 }
2946 q->tx_waiting = 0;
2947 virtio_del_queue(vdev, index * 2 + 1);
2948 }
2949
2950 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2951 {
2952 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2953 int old_num_queues = virtio_get_num_queues(vdev);
2954 int new_num_queues = new_max_queue_pairs * 2 + 1;
2955 int i;
2956
2957 assert(old_num_queues >= 3);
2958 assert(old_num_queues % 2 == 1);
2959
2960 if (old_num_queues == new_num_queues) {
2961 return;
2962 }
2963
2964 /*
2965 * We always need to remove and add ctrl vq if
2966 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2967 * and then we only enter one of the following two loops.
2968 */
2969 virtio_del_queue(vdev, old_num_queues - 1);
2970
2971 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2972 /* new_num_queues < old_num_queues */
2973 virtio_net_del_queue(n, i / 2);
2974 }
2975
2976 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2977 /* new_num_queues > old_num_queues */
2978 virtio_net_add_queue(n, i / 2);
2979 }
2980
2981 /* add ctrl_vq last */
2982 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2983 }
2984
2985 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2986 {
2987 int max = multiqueue ? n->max_queue_pairs : 1;
2988
2989 n->multiqueue = multiqueue;
2990 virtio_net_change_num_queue_pairs(n, max);
2991
2992 virtio_net_set_queue_pairs(n);
2993 }
2994
2995 static int virtio_net_post_load_device(void *opaque, int version_id)
2996 {
2997 VirtIONet *n = opaque;
2998 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2999 int i, link_down;
3000
3001 trace_virtio_net_post_load_device();
3002 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
3003 virtio_vdev_has_feature(vdev,
3004 VIRTIO_F_VERSION_1),
3005 virtio_vdev_has_feature(vdev,
3006 VIRTIO_NET_F_HASH_REPORT));
3007
3008 /* MAC_TABLE_ENTRIES may be different from the saved image */
3009 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
3010 n->mac_table.in_use = 0;
3011 }
3012
3013 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
3014 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3015 }
3016
3017 /*
3018 * curr_guest_offloads will be later overwritten by the
3019 * virtio_set_features_nocheck call done from the virtio_load.
3020 * Here we make sure it is preserved and restored accordingly
3021 * in the virtio_net_post_load_virtio callback.
3022 */
3023 n->saved_guest_offloads = n->curr_guest_offloads;
3024
3025 virtio_net_set_queue_pairs(n);
3026
3027 /* Find the first multicast entry in the saved MAC filter */
3028 for (i = 0; i < n->mac_table.in_use; i++) {
3029 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3030 break;
3031 }
3032 }
3033 n->mac_table.first_multi = i;
3034
3035 /* nc.link_down can't be migrated, so infer link_down according
3036 * to link status bit in n->status */
3037 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3038 for (i = 0; i < n->max_queue_pairs; i++) {
3039 qemu_get_subqueue(n->nic, i)->link_down = link_down;
3040 }
3041
3042 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3043 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3044 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3045 QEMU_CLOCK_VIRTUAL,
3046 virtio_net_announce_timer, n);
3047 if (n->announce_timer.round) {
3048 timer_mod(n->announce_timer.tm,
3049 qemu_clock_get_ms(n->announce_timer.type));
3050 } else {
3051 qemu_announce_timer_del(&n->announce_timer, false);
3052 }
3053 }
3054
3055 if (n->rss_data.enabled) {
3056 n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
3057 if (!n->rss_data.populate_hash) {
3058 if (!virtio_net_attach_epbf_rss(n)) {
3059 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
3060 warn_report("Can't post-load eBPF RSS for vhost");
3061 } else {
3062 warn_report("Can't post-load eBPF RSS - "
3063 "fallback to software RSS");
3064 n->rss_data.enabled_software_rss = true;
3065 }
3066 }
3067 }
3068
3069 trace_virtio_net_rss_enable(n->rss_data.hash_types,
3070 n->rss_data.indirections_len,
3071 sizeof(n->rss_data.key));
3072 } else {
3073 trace_virtio_net_rss_disable();
3074 }
3075 return 0;
3076 }
3077
3078 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3079 {
3080 VirtIONet *n = VIRTIO_NET(vdev);
3081 /*
3082 * The actual needed state is now in saved_guest_offloads,
3083 * see virtio_net_post_load_device for detail.
3084 * Restore it back and apply the desired offloads.
3085 */
3086 n->curr_guest_offloads = n->saved_guest_offloads;
3087 if (peer_has_vnet_hdr(n)) {
3088 virtio_net_apply_guest_offloads(n);
3089 }
3090
3091 return 0;
3092 }
3093
3094 /* tx_waiting field of a VirtIONetQueue */
3095 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3096 .name = "virtio-net-queue-tx_waiting",
3097 .fields = (VMStateField[]) {
3098 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3099 VMSTATE_END_OF_LIST()
3100 },
3101 };
3102
3103 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3104 {
3105 return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3106 }
3107
3108 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3109 {
3110 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3111 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3112 }
3113
3114 static bool mac_table_fits(void *opaque, int version_id)
3115 {
3116 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3117 }
3118
3119 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3120 {
3121 return !mac_table_fits(opaque, version_id);
3122 }
3123
3124 /* This temporary type is shared by all the WITH_TMP methods
3125 * although only some fields are used by each.
3126 */
3127 struct VirtIONetMigTmp {
3128 VirtIONet *parent;
3129 VirtIONetQueue *vqs_1;
3130 uint16_t curr_queue_pairs_1;
3131 uint8_t has_ufo;
3132 uint32_t has_vnet_hdr;
3133 };
3134
3135 /* The 2nd and subsequent tx_waiting flags are loaded later than
3136 * the 1st entry in the queue_pairs and only if there's more than one
3137 * entry. We use the tmp mechanism to calculate a temporary
3138 * pointer and count and also validate the count.
3139 */
3140
3141 static int virtio_net_tx_waiting_pre_save(void *opaque)
3142 {
3143 struct VirtIONetMigTmp *tmp = opaque;
3144
3145 tmp->vqs_1 = tmp->parent->vqs + 1;
3146 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3147 if (tmp->parent->curr_queue_pairs == 0) {
3148 tmp->curr_queue_pairs_1 = 0;
3149 }
3150
3151 return 0;
3152 }
3153
3154 static int virtio_net_tx_waiting_pre_load(void *opaque)
3155 {
3156 struct VirtIONetMigTmp *tmp = opaque;
3157
3158 /* Reuse the pointer setup from save */
3159 virtio_net_tx_waiting_pre_save(opaque);
3160
3161 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3162 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3163 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3164
3165 return -EINVAL;
3166 }
3167
3168 return 0; /* all good */
3169 }
3170
3171 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3172 .name = "virtio-net-tx_waiting",
3173 .pre_load = virtio_net_tx_waiting_pre_load,
3174 .pre_save = virtio_net_tx_waiting_pre_save,
3175 .fields = (VMStateField[]) {
3176 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3177 curr_queue_pairs_1,
3178 vmstate_virtio_net_queue_tx_waiting,
3179 struct VirtIONetQueue),
3180 VMSTATE_END_OF_LIST()
3181 },
3182 };
3183
3184 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3185 * flag set we need to check that we have it
3186 */
3187 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3188 {
3189 struct VirtIONetMigTmp *tmp = opaque;
3190
3191 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3192 error_report("virtio-net: saved image requires TUN_F_UFO support");
3193 return -EINVAL;
3194 }
3195
3196 return 0;
3197 }
3198
3199 static int virtio_net_ufo_pre_save(void *opaque)
3200 {
3201 struct VirtIONetMigTmp *tmp = opaque;
3202
3203 tmp->has_ufo = tmp->parent->has_ufo;
3204
3205 return 0;
3206 }
3207
3208 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3209 .name = "virtio-net-ufo",
3210 .post_load = virtio_net_ufo_post_load,
3211 .pre_save = virtio_net_ufo_pre_save,
3212 .fields = (VMStateField[]) {
3213 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3214 VMSTATE_END_OF_LIST()
3215 },
3216 };
3217
3218 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3219 * flag set we need to check that we have it
3220 */
3221 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3222 {
3223 struct VirtIONetMigTmp *tmp = opaque;
3224
3225 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3226 error_report("virtio-net: saved image requires vnet_hdr=on");
3227 return -EINVAL;
3228 }
3229
3230 return 0;
3231 }
3232
3233 static int virtio_net_vnet_pre_save(void *opaque)
3234 {
3235 struct VirtIONetMigTmp *tmp = opaque;
3236
3237 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3238
3239 return 0;
3240 }
3241
3242 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3243 .name = "virtio-net-vnet",
3244 .post_load = virtio_net_vnet_post_load,
3245 .pre_save = virtio_net_vnet_pre_save,
3246 .fields = (VMStateField[]) {
3247 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3248 VMSTATE_END_OF_LIST()
3249 },
3250 };
3251
3252 static bool virtio_net_rss_needed(void *opaque)
3253 {
3254 return VIRTIO_NET(opaque)->rss_data.enabled;
3255 }
3256
3257 static const VMStateDescription vmstate_virtio_net_rss = {
3258 .name = "virtio-net-device/rss",
3259 .version_id = 1,
3260 .minimum_version_id = 1,
3261 .needed = virtio_net_rss_needed,
3262 .fields = (VMStateField[]) {
3263 VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3264 VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3265 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3266 VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3267 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3268 VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3269 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3270 VIRTIO_NET_RSS_MAX_KEY_SIZE),
3271 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3272 rss_data.indirections_len, 0,
3273 vmstate_info_uint16, uint16_t),
3274 VMSTATE_END_OF_LIST()
3275 },
3276 };
3277
3278 static const VMStateDescription vmstate_virtio_net_device = {
3279 .name = "virtio-net-device",
3280 .version_id = VIRTIO_NET_VM_VERSION,
3281 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3282 .post_load = virtio_net_post_load_device,
3283 .fields = (VMStateField[]) {
3284 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3285 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3286 vmstate_virtio_net_queue_tx_waiting,
3287 VirtIONetQueue),
3288 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3289 VMSTATE_UINT16(status, VirtIONet),
3290 VMSTATE_UINT8(promisc, VirtIONet),
3291 VMSTATE_UINT8(allmulti, VirtIONet),
3292 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3293
3294 /* Guarded pair: If it fits we load it, else we throw it away
3295 * - can happen if source has a larger MAC table.; post-load
3296 * sets flags in this case.
3297 */
3298 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3299 0, mac_table_fits, mac_table.in_use,
3300 ETH_ALEN),
3301 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3302 mac_table.in_use, ETH_ALEN),
3303
3304 /* Note: This is an array of uint32's that's always been saved as a
3305 * buffer; hold onto your endiannesses; it's actually used as a bitmap
3306 * but based on the uint.
3307 */
3308 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3309 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3310 vmstate_virtio_net_has_vnet),
3311 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3312 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3313 VMSTATE_UINT8(alluni, VirtIONet),
3314 VMSTATE_UINT8(nomulti, VirtIONet),
3315 VMSTATE_UINT8(nouni, VirtIONet),
3316 VMSTATE_UINT8(nobcast, VirtIONet),
3317 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3318 vmstate_virtio_net_has_ufo),
3319 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3320 vmstate_info_uint16_equal, uint16_t),
3321 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3322 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3323 vmstate_virtio_net_tx_waiting),
3324 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3325 has_ctrl_guest_offloads),
3326 VMSTATE_END_OF_LIST()
3327 },
3328 .subsections = (const VMStateDescription * []) {
3329 &vmstate_virtio_net_rss,
3330 NULL
3331 }
3332 };
3333
3334 static NetClientInfo net_virtio_info = {
3335 .type = NET_CLIENT_DRIVER_NIC,
3336 .size = sizeof(NICState),
3337 .can_receive = virtio_net_can_receive,
3338 .receive = virtio_net_receive,
3339 .link_status_changed = virtio_net_set_link_status,
3340 .query_rx_filter = virtio_net_query_rxfilter,
3341 .announce = virtio_net_announce,
3342 };
3343
3344 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3345 {
3346 VirtIONet *n = VIRTIO_NET(vdev);
3347 NetClientState *nc;
3348 assert(n->vhost_started);
3349 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3350 /* Must guard against invalid features and bogus queue index
3351 * from being set by malicious guest, or penetrated through
3352 * buggy migration stream.
3353 */
3354 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3355 qemu_log_mask(LOG_GUEST_ERROR,
3356 "%s: bogus vq index ignored\n", __func__);
3357 return false;
3358 }
3359 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3360 } else {
3361 nc = qemu_get_subqueue(n->nic, vq2q(idx));
3362 }
3363 /*
3364 * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3365 * as the macro of configure interrupt's IDX, If this driver does not
3366 * support, the function will return false
3367 */
3368
3369 if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3370 return vhost_net_config_pending(get_vhost_net(nc->peer));
3371 }
3372 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3373 }
3374
3375 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3376 bool mask)
3377 {
3378 VirtIONet *n = VIRTIO_NET(vdev);
3379 NetClientState *nc;
3380 assert(n->vhost_started);
3381 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3382 /* Must guard against invalid features and bogus queue index
3383 * from being set by malicious guest, or penetrated through
3384 * buggy migration stream.
3385 */
3386 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3387 qemu_log_mask(LOG_GUEST_ERROR,
3388 "%s: bogus vq index ignored\n", __func__);
3389 return;
3390 }
3391 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3392 } else {
3393 nc = qemu_get_subqueue(n->nic, vq2q(idx));
3394 }
3395 /*
3396 *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3397 * as the macro of configure interrupt's IDX, If this driver does not
3398 * support, the function will return
3399 */
3400
3401 if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3402 vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3403 return;
3404 }
3405 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3406 }
3407
3408 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3409 {
3410 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3411
3412 n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3413 }
3414
3415 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3416 const char *type)
3417 {
3418 /*
3419 * The name can be NULL, the netclient name will be type.x.
3420 */
3421 assert(type != NULL);
3422
3423 g_free(n->netclient_name);
3424 g_free(n->netclient_type);
3425 n->netclient_name = g_strdup(name);
3426 n->netclient_type = g_strdup(type);
3427 }
3428
3429 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3430 {
3431 HotplugHandler *hotplug_ctrl;
3432 PCIDevice *pci_dev;
3433 Error *err = NULL;
3434
3435 hotplug_ctrl = qdev_get_hotplug_handler(dev);
3436 if (hotplug_ctrl) {
3437 pci_dev = PCI_DEVICE(dev);
3438 pci_dev->partially_hotplugged = true;
3439 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3440 if (err) {
3441 error_report_err(err);
3442 return false;
3443 }
3444 } else {
3445 return false;
3446 }
3447 return true;
3448 }
3449
3450 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3451 Error **errp)
3452 {
3453 Error *err = NULL;
3454 HotplugHandler *hotplug_ctrl;
3455 PCIDevice *pdev = PCI_DEVICE(dev);
3456 BusState *primary_bus;
3457
3458 if (!pdev->partially_hotplugged) {
3459 return true;
3460 }
3461 primary_bus = dev->parent_bus;
3462 if (!primary_bus) {
3463 error_setg(errp, "virtio_net: couldn't find primary bus");
3464 return false;
3465 }
3466 qdev_set_parent_bus(dev, primary_bus, &error_abort);
3467 qatomic_set(&n->failover_primary_hidden, false);
3468 hotplug_ctrl = qdev_get_hotplug_handler(dev);
3469 if (hotplug_ctrl) {
3470 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3471 if (err) {
3472 goto out;
3473 }
3474 hotplug_handler_plug(hotplug_ctrl, dev, &err);
3475 }
3476 pdev->partially_hotplugged = false;
3477
3478 out:
3479 error_propagate(errp, err);
3480 return !err;
3481 }
3482
3483 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3484 {
3485 bool should_be_hidden;
3486 Error *err = NULL;
3487 DeviceState *dev = failover_find_primary_device(n);
3488
3489 if (!dev) {
3490 return;
3491 }
3492
3493 should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3494
3495 if (migration_in_setup(s) && !should_be_hidden) {
3496 if (failover_unplug_primary(n, dev)) {
3497 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3498 qapi_event_send_unplug_primary(dev->id);
3499 qatomic_set(&n->failover_primary_hidden, true);
3500 } else {
3501 warn_report("couldn't unplug primary device");
3502 }
3503 } else if (migration_has_failed(s)) {
3504 /* We already unplugged the device let's plug it back */
3505 if (!failover_replug_primary(n, dev, &err)) {
3506 if (err) {
3507 error_report_err(err);
3508 }
3509 }
3510 }
3511 }
3512
3513 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3514 {
3515 MigrationState *s = data;
3516 VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3517 virtio_net_handle_migration_primary(n, s);
3518 }
3519
3520 static bool failover_hide_primary_device(DeviceListener *listener,
3521 const QDict *device_opts,
3522 bool from_json,
3523 Error **errp)
3524 {
3525 VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3526 const char *standby_id;
3527
3528 if (!device_opts) {
3529 return false;
3530 }
3531
3532 if (!qdict_haskey(device_opts, "failover_pair_id")) {
3533 return false;
3534 }
3535
3536 if (!qdict_haskey(device_opts, "id")) {
3537 error_setg(errp, "Device with failover_pair_id needs to have id");
3538 return false;
3539 }
3540
3541 standby_id = qdict_get_str(device_opts, "failover_pair_id");
3542 if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3543 return false;
3544 }
3545
3546 /*
3547 * The hide helper can be called several times for a given device.
3548 * Check there is only one primary for a virtio-net device but
3549 * don't duplicate the qdict several times if it's called for the same
3550 * device.
3551 */
3552 if (n->primary_opts) {
3553 const char *old, *new;
3554 /* devices with failover_pair_id always have an id */
3555 old = qdict_get_str(n->primary_opts, "id");
3556 new = qdict_get_str(device_opts, "id");
3557 if (strcmp(old, new) != 0) {
3558 error_setg(errp, "Cannot attach more than one primary device to "
3559 "'%s': '%s' and '%s'", n->netclient_name, old, new);
3560 return false;
3561 }
3562 } else {
3563 n->primary_opts = qdict_clone_shallow(device_opts);
3564 n->primary_opts_from_json = from_json;
3565 }
3566
3567 /* failover_primary_hidden is set during feature negotiation */
3568 return qatomic_read(&n->failover_primary_hidden);
3569 }
3570
3571 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3572 {
3573 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3574 VirtIONet *n = VIRTIO_NET(dev);
3575 NetClientState *nc;
3576 int i;
3577
3578 if (n->net_conf.mtu) {
3579 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3580 }
3581
3582 if (n->net_conf.duplex_str) {
3583 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3584 n->net_conf.duplex = DUPLEX_HALF;
3585 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3586 n->net_conf.duplex = DUPLEX_FULL;
3587 } else {
3588 error_setg(errp, "'duplex' must be 'half' or 'full'");
3589 return;
3590 }
3591 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3592 } else {
3593 n->net_conf.duplex = DUPLEX_UNKNOWN;
3594 }
3595
3596 if (n->net_conf.speed < SPEED_UNKNOWN) {
3597 error_setg(errp, "'speed' must be between 0 and INT_MAX");
3598 return;
3599 }
3600 if (n->net_conf.speed >= 0) {
3601 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3602 }
3603
3604 if (n->failover) {
3605 n->primary_listener.hide_device = failover_hide_primary_device;
3606 qatomic_set(&n->failover_primary_hidden, true);
3607 device_listener_register(&n->primary_listener);
3608 n->migration_state.notify = virtio_net_migration_state_notifier;
3609 add_migration_state_change_notifier(&n->migration_state);
3610 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3611 }
3612
3613 virtio_net_set_config_size(n, n->host_features);
3614 virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3615
3616 /*
3617 * We set a lower limit on RX queue size to what it always was.
3618 * Guests that want a smaller ring can always resize it without
3619 * help from us (using virtio 1 and up).
3620 */
3621 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3622 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3623 !is_power_of_2(n->net_conf.rx_queue_size)) {
3624 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3625 "must be a power of 2 between %d and %d.",
3626 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3627 VIRTQUEUE_MAX_SIZE);
3628 virtio_cleanup(vdev);
3629 return;
3630 }
3631
3632 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3633 n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
3634 !is_power_of_2(n->net_conf.tx_queue_size)) {
3635 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3636 "must be a power of 2 between %d and %d",
3637 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3638 virtio_net_max_tx_queue_size(n));
3639 virtio_cleanup(vdev);
3640 return;
3641 }
3642
3643 n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3644
3645 /*
3646 * Figure out the datapath queue pairs since the backend could
3647 * provide control queue via peers as well.
3648 */
3649 if (n->nic_conf.peers.queues) {
3650 for (i = 0; i < n->max_ncs; i++) {
3651 if (n->nic_conf.peers.ncs[i]->is_datapath) {
3652 ++n->max_queue_pairs;
3653 }
3654 }
3655 }
3656 n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3657
3658 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3659 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3660 "must be a positive integer less than %d.",
3661 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3662 virtio_cleanup(vdev);
3663 return;
3664 }
3665 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3666 n->curr_queue_pairs = 1;
3667 n->tx_timeout = n->net_conf.txtimer;
3668
3669 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3670 && strcmp(n->net_conf.tx, "bh")) {
3671 warn_report("virtio-net: "
3672 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3673 n->net_conf.tx);
3674 error_printf("Defaulting to \"bh\"");
3675 }
3676
3677 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3678 n->net_conf.tx_queue_size);
3679
3680 for (i = 0; i < n->max_queue_pairs; i++) {
3681 virtio_net_add_queue(n, i);
3682 }
3683
3684 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3685 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3686 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3687 n->status = VIRTIO_NET_S_LINK_UP;
3688 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3689 QEMU_CLOCK_VIRTUAL,
3690 virtio_net_announce_timer, n);
3691 n->announce_timer.round = 0;
3692
3693 if (n->netclient_type) {
3694 /*
3695 * Happen when virtio_net_set_netclient_name has been called.
3696 */
3697 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3698 n->netclient_type, n->netclient_name, n);
3699 } else {
3700 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3701 object_get_typename(OBJECT(dev)), dev->id, n);
3702 }
3703
3704 for (i = 0; i < n->max_queue_pairs; i++) {
3705 n->nic->ncs[i].do_not_pad = true;
3706 }
3707
3708 peer_test_vnet_hdr(n);
3709 if (peer_has_vnet_hdr(n)) {
3710 for (i = 0; i < n->max_queue_pairs; i++) {
3711 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3712 }
3713 n->host_hdr_len = sizeof(struct virtio_net_hdr);
3714 } else {
3715 n->host_hdr_len = 0;
3716 }
3717
3718 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3719
3720 n->vqs[0].tx_waiting = 0;
3721 n->tx_burst = n->net_conf.txburst;
3722 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3723 n->promisc = 1; /* for compatibility */
3724
3725 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3726
3727 n->vlans = g_malloc0(MAX_VLAN >> 3);
3728
3729 nc = qemu_get_queue(n->nic);
3730 nc->rxfilter_notify_enabled = 1;
3731
3732 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3733 struct virtio_net_config netcfg = {};
3734 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3735 vhost_net_set_config(get_vhost_net(nc->peer),
3736 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
3737 }
3738 QTAILQ_INIT(&n->rsc_chains);
3739 n->qdev = dev;
3740
3741 net_rx_pkt_init(&n->rx_pkt);
3742
3743 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3744 virtio_net_load_ebpf(n);
3745 }
3746 }
3747
3748 static void virtio_net_device_unrealize(DeviceState *dev)
3749 {
3750 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3751 VirtIONet *n = VIRTIO_NET(dev);
3752 int i, max_queue_pairs;
3753
3754 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3755 virtio_net_unload_ebpf(n);
3756 }
3757
3758 /* This will stop vhost backend if appropriate. */
3759 virtio_net_set_status(vdev, 0);
3760
3761 g_free(n->netclient_name);
3762 n->netclient_name = NULL;
3763 g_free(n->netclient_type);
3764 n->netclient_type = NULL;
3765
3766 g_free(n->mac_table.macs);
3767 g_free(n->vlans);
3768
3769 if (n->failover) {
3770 qobject_unref(n->primary_opts);
3771 device_listener_unregister(&n->primary_listener);
3772 remove_migration_state_change_notifier(&n->migration_state);
3773 } else {
3774 assert(n->primary_opts == NULL);
3775 }
3776
3777 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3778 for (i = 0; i < max_queue_pairs; i++) {
3779 virtio_net_del_queue(n, i);
3780 }
3781 /* delete also control vq */
3782 virtio_del_queue(vdev, max_queue_pairs * 2);
3783 qemu_announce_timer_del(&n->announce_timer, false);
3784 g_free(n->vqs);
3785 qemu_del_nic(n->nic);
3786 virtio_net_rsc_cleanup(n);
3787 g_free(n->rss_data.indirections_table);
3788 net_rx_pkt_uninit(n->rx_pkt);
3789 virtio_cleanup(vdev);
3790 }
3791
3792 static void virtio_net_instance_init(Object *obj)
3793 {
3794 VirtIONet *n = VIRTIO_NET(obj);
3795
3796 /*
3797 * The default config_size is sizeof(struct virtio_net_config).
3798 * Can be overriden with virtio_net_set_config_size.
3799 */
3800 n->config_size = sizeof(struct virtio_net_config);
3801 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3802 "bootindex", "/ethernet-phy@0",
3803 DEVICE(n));
3804
3805 ebpf_rss_init(&n->ebpf_rss);
3806 }
3807
3808 static int virtio_net_pre_save(void *opaque)
3809 {
3810 VirtIONet *n = opaque;
3811
3812 /* At this point, backend must be stopped, otherwise
3813 * it might keep writing to memory. */
3814 assert(!n->vhost_started);
3815
3816 return 0;
3817 }
3818
3819 static bool primary_unplug_pending(void *opaque)
3820 {
3821 DeviceState *dev = opaque;
3822 DeviceState *primary;
3823 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3824 VirtIONet *n = VIRTIO_NET(vdev);
3825
3826 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3827 return false;
3828 }
3829 primary = failover_find_primary_device(n);
3830 return primary ? primary->pending_deleted_event : false;
3831 }
3832
3833 static bool dev_unplug_pending(void *opaque)
3834 {
3835 DeviceState *dev = opaque;
3836 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3837
3838 return vdc->primary_unplug_pending(dev);
3839 }
3840
3841 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3842 {
3843 VirtIONet *n = VIRTIO_NET(vdev);
3844 NetClientState *nc = qemu_get_queue(n->nic);
3845 struct vhost_net *net = get_vhost_net(nc->peer);
3846 return &net->dev;
3847 }
3848
3849 static const VMStateDescription vmstate_virtio_net = {
3850 .name = "virtio-net",
3851 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3852 .version_id = VIRTIO_NET_VM_VERSION,
3853 .fields = (VMStateField[]) {
3854 VMSTATE_VIRTIO_DEVICE,
3855 VMSTATE_END_OF_LIST()
3856 },
3857 .pre_save = virtio_net_pre_save,
3858 .dev_unplug_pending = dev_unplug_pending,
3859 };
3860
3861 static Property virtio_net_properties[] = {
3862 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3863 VIRTIO_NET_F_CSUM, true),
3864 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3865 VIRTIO_NET_F_GUEST_CSUM, true),
3866 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3867 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3868 VIRTIO_NET_F_GUEST_TSO4, true),
3869 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3870 VIRTIO_NET_F_GUEST_TSO6, true),
3871 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3872 VIRTIO_NET_F_GUEST_ECN, true),
3873 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3874 VIRTIO_NET_F_GUEST_UFO, true),
3875 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3876 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3877 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3878 VIRTIO_NET_F_HOST_TSO4, true),
3879 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3880 VIRTIO_NET_F_HOST_TSO6, true),
3881 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3882 VIRTIO_NET_F_HOST_ECN, true),
3883 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3884 VIRTIO_NET_F_HOST_UFO, true),
3885 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3886 VIRTIO_NET_F_MRG_RXBUF, true),
3887 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3888 VIRTIO_NET_F_STATUS, true),
3889 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3890 VIRTIO_NET_F_CTRL_VQ, true),
3891 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3892 VIRTIO_NET_F_CTRL_RX, true),
3893 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3894 VIRTIO_NET_F_CTRL_VLAN, true),
3895 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3896 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3897 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3898 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3899 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3900 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3901 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3902 DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3903 VIRTIO_NET_F_RSS, false),
3904 DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3905 VIRTIO_NET_F_HASH_REPORT, false),
3906 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3907 VIRTIO_NET_F_RSC_EXT, false),
3908 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3909 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3910 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3911 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3912 TX_TIMER_INTERVAL),
3913 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3914 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3915 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3916 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3917 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3918 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3919 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3920 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3921 true),
3922 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3923 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3924 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3925 DEFINE_PROP_END_OF_LIST(),
3926 };
3927
3928 static void virtio_net_class_init(ObjectClass *klass, void *data)
3929 {
3930 DeviceClass *dc = DEVICE_CLASS(klass);
3931 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3932
3933 device_class_set_props(dc, virtio_net_properties);
3934 dc->vmsd = &vmstate_virtio_net;
3935 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3936 vdc->realize = virtio_net_device_realize;
3937 vdc->unrealize = virtio_net_device_unrealize;
3938 vdc->get_config = virtio_net_get_config;
3939 vdc->set_config = virtio_net_set_config;
3940 vdc->get_features = virtio_net_get_features;
3941 vdc->set_features = virtio_net_set_features;
3942 vdc->bad_features = virtio_net_bad_features;
3943 vdc->reset = virtio_net_reset;
3944 vdc->queue_reset = virtio_net_queue_reset;
3945 vdc->queue_enable = virtio_net_queue_enable;
3946 vdc->set_status = virtio_net_set_status;
3947 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3948 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3949 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3950 vdc->post_load = virtio_net_post_load_virtio;
3951 vdc->vmsd = &vmstate_virtio_net_device;
3952 vdc->primary_unplug_pending = primary_unplug_pending;
3953 vdc->get_vhost = virtio_net_get_vhost;
3954 vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
3955 }
3956
3957 static const TypeInfo virtio_net_info = {
3958 .name = TYPE_VIRTIO_NET,
3959 .parent = TYPE_VIRTIO_DEVICE,
3960 .instance_size = sizeof(VirtIONet),
3961 .instance_init = virtio_net_instance_init,
3962 .class_init = virtio_net_class_init,
3963 };
3964
3965 static void virtio_register_types(void)
3966 {
3967 type_register_static(&virtio_net_info);
3968 }
3969
3970 type_init(virtio_register_types)