]> git.proxmox.com Git - mirror_qemu.git/blame - hw/net/virtio-net.c
hw/nvme: check maximum copy length (MCL) for COPY
[mirror_qemu.git] / hw / net / virtio-net.c
CommitLineData
fbe78f4f
AL
1/*
2 * Virtio Network Device
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
9b8bfe21 14#include "qemu/osdep.h"
9711cd0d 15#include "qemu/atomic.h"
1de7afc9 16#include "qemu/iov.h"
68b0a639 17#include "qemu/log.h"
db725815 18#include "qemu/main-loop.h"
0b8fa32f 19#include "qemu/module.h"
0d09e41a 20#include "hw/virtio/virtio.h"
1422e32d 21#include "net/net.h"
7200ac3c 22#include "net/checksum.h"
a8ed73f7 23#include "net/tap.h"
1de7afc9
PB
24#include "qemu/error-report.h"
25#include "qemu/timer.h"
9711cd0d
JF
26#include "qemu/option.h"
27#include "qemu/option_int.h"
28#include "qemu/config-file.h"
29#include "qapi/qmp/qdict.h"
0d09e41a
PB
30#include "hw/virtio/virtio-net.h"
31#include "net/vhost_net.h"
9d8c6a25 32#include "net/announce.h"
17ec5a86 33#include "hw/virtio/virtio-bus.h"
e688df6b 34#include "qapi/error.h"
9af23989 35#include "qapi/qapi-events-net.h"
a27bd6c7 36#include "hw/qdev-properties.h"
9711cd0d
JF
37#include "qapi/qapi-types-migration.h"
38#include "qapi/qapi-events-migration.h"
1399c60d 39#include "hw/virtio/virtio-access.h"
f8d806c9 40#include "migration/misc.h"
9473939e 41#include "standard-headers/linux/ethtool.h"
2f780b6a 42#include "sysemu/sysemu.h"
9d8c6a25 43#include "trace.h"
9711cd0d 44#include "monitor/qdev.h"
edf5ca5d 45#include "hw/pci/pci_device.h"
4474e37a 46#include "net_rx_pkt.h"
108a6481 47#include "hw/virtio/vhost.h"
1b529d90 48#include "sysemu/qtest.h"
fbe78f4f 49
0ce0e8f4 50#define VIRTIO_NET_VM_VERSION 11
b6503ed9 51
f21c0ed9 52#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
9d6271b8 53
1c0fbfa3
MT
54/* previously fixed value */
55#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
9b02e161
WW
56#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57
441537f1 58/* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
1c0fbfa3 59#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
9b02e161 60#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
1c0fbfa3 61
2974e916
YB
62#define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
63
64#define VIRTIO_NET_TCP_FLAG 0x3F
65#define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
66
67/* IPv4 max payload, 16 bits in the header */
68#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70
71/* header length value in ip header without option */
72#define VIRTIO_NET_IP4_HEADER_LENGTH 5
73
74#define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
75#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76
77/* Purge coalesced packets timer interval, This value affects the performance
78 a lot, and should be tuned carefully, '300000'(300us) is the recommended
79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80 tso/gso/gro 'off'. */
81#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82
59079029
YB
83#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92
ad6461ad 93static const VirtIOFeature feature_sizes[] = {
127833ee 94 {.flags = 1ULL << VIRTIO_NET_F_MAC,
5d5b33c0 95 .end = endof(struct virtio_net_config, mac)},
127833ee 96 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
5d5b33c0 97 .end = endof(struct virtio_net_config, status)},
127833ee 98 {.flags = 1ULL << VIRTIO_NET_F_MQ,
5d5b33c0 99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
127833ee 100 {.flags = 1ULL << VIRTIO_NET_F_MTU,
5d5b33c0 101 .end = endof(struct virtio_net_config, mtu)},
9473939e 102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
5d5b33c0 103 .end = endof(struct virtio_net_config, duplex)},
e22f0603 104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
59079029 105 .end = endof(struct virtio_net_config, supported_hash_types)},
14f9b664
JL
106 {}
107};
108
d74c30c8
DT
109static const VirtIOConfigSizeParams cfg_size_params = {
110 .min_size = endof(struct virtio_net_config, mac),
111 .max_size = sizeof(struct virtio_net_config),
112 .feature_sizes = feature_sizes
113};
114
fed699f9 115static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
0c87e93e
JW
116{
117 VirtIONet *n = qemu_get_nic_opaque(nc);
118
fed699f9 119 return &n->vqs[nc->queue_index];
0c87e93e 120}
fed699f9
JW
121
122static int vq2q(int queue_index)
123{
124 return queue_index / 2;
125}
126
4fdf69ab
KX
127static void flush_or_purge_queued_packets(NetClientState *nc)
128{
129 if (!nc->peer) {
130 return;
131 }
132
133 qemu_flush_or_purge_queued_packets(nc->peer, true);
134 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
135}
136
fbe78f4f
AL
137/* TODO
138 * - we could suppress RX interrupt if we were so inclined.
139 */
140
0f03eca6 141static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
fbe78f4f 142{
17a0ca55 143 VirtIONet *n = VIRTIO_NET(vdev);
fbe78f4f 144 struct virtio_net_config netcfg;
c546ecf2 145 NetClientState *nc = qemu_get_queue(n->nic);
fb592882 146 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
fbe78f4f 147
108a6481
CL
148 int ret = 0;
149 memset(&netcfg, 0 , sizeof(struct virtio_net_config));
1399c60d 150 virtio_stw_p(vdev, &netcfg.status, n->status);
441537f1 151 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
a93e599d 152 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
79674068 153 memcpy(netcfg.mac, n->mac, ETH_ALEN);
9473939e
JB
154 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
155 netcfg.duplex = n->net_conf.duplex;
59079029
YB
156 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
157 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
e22f0603
YB
158 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
159 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
59079029
YB
160 virtio_stl_p(vdev, &netcfg.supported_hash_types,
161 VIRTIO_NET_RSS_SUPPORTED_HASHES);
14f9b664 162 memcpy(config, &netcfg, n->config_size);
108a6481 163
c546ecf2
JW
164 /*
165 * Is this VDPA? No peer means not VDPA: there's no way to
166 * disconnect/reconnect a VDPA peer.
167 */
168 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
108a6481 169 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
c546ecf2 170 n->config_size);
ebc141a6
EP
171 if (ret == -1) {
172 return;
c546ecf2 173 }
ebc141a6
EP
174
175 /*
176 * Some NIC/kernel combinations present 0 as the mac address. As that
177 * is not a legal address, try to proceed with the address from the
178 * QEMU command line in the hope that the address has been configured
179 * correctly elsewhere - just not reported by the device.
180 */
181 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
182 info_report("Zero hardware mac address detected. Ignoring.");
183 memcpy(netcfg.mac, n->mac, ETH_ALEN);
184 }
185
4f93aafc
EP
186 netcfg.status |= virtio_tswap16(vdev,
187 n->status & VIRTIO_NET_S_ANNOUNCE);
ebc141a6 188 memcpy(config, &netcfg, n->config_size);
108a6481 189 }
fbe78f4f
AL
190}
191
0f03eca6
AL
192static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
193{
17a0ca55 194 VirtIONet *n = VIRTIO_NET(vdev);
14f9b664 195 struct virtio_net_config netcfg = {};
c546ecf2 196 NetClientState *nc = qemu_get_queue(n->nic);
0f03eca6 197
14f9b664 198 memcpy(&netcfg, config, n->config_size);
0f03eca6 199
95129d6f
CH
200 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
201 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
c1943a3f 202 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
79674068 203 memcpy(n->mac, netcfg.mac, ETH_ALEN);
b356f76d 204 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
0f03eca6 205 }
108a6481 206
c546ecf2
JW
207 /*
208 * Is this VDPA? No peer means not VDPA: there's no way to
209 * disconnect/reconnect a VDPA peer.
210 */
211 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
212 vhost_net_set_config(get_vhost_net(nc->peer),
213 (uint8_t *)&netcfg, 0, n->config_size,
f8ed3648 214 VHOST_SET_CONFIG_TYPE_FRONTEND);
108a6481 215 }
0f03eca6
AL
216}
217
783e7706
MT
218static bool virtio_net_started(VirtIONet *n, uint8_t status)
219{
17a0ca55 220 VirtIODevice *vdev = VIRTIO_DEVICE(n);
783e7706 221 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
17a0ca55 222 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
783e7706
MT
223}
224
b2c929f0
DDAG
225static void virtio_net_announce_notify(VirtIONet *net)
226{
227 VirtIODevice *vdev = VIRTIO_DEVICE(net);
228 trace_virtio_net_announce_notify();
229
230 net->status |= VIRTIO_NET_S_ANNOUNCE;
231 virtio_notify_config(vdev);
232}
233
f57fcf70
JW
234static void virtio_net_announce_timer(void *opaque)
235{
236 VirtIONet *n = opaque;
9d8c6a25 237 trace_virtio_net_announce_timer(n->announce_timer.round);
f57fcf70 238
9d8c6a25 239 n->announce_timer.round--;
b2c929f0
DDAG
240 virtio_net_announce_notify(n);
241}
242
243static void virtio_net_announce(NetClientState *nc)
244{
245 VirtIONet *n = qemu_get_nic_opaque(nc);
246 VirtIODevice *vdev = VIRTIO_DEVICE(n);
247
248 /*
249 * Make sure the virtio migration announcement timer isn't running
250 * If it is, let it trigger announcement so that we do not cause
251 * confusion.
252 */
253 if (n->announce_timer.round) {
254 return;
255 }
256
257 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
258 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
259 virtio_net_announce_notify(n);
260 }
f57fcf70
JW
261}
262
783e7706 263static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
afbaa7b4 264{
17a0ca55 265 VirtIODevice *vdev = VIRTIO_DEVICE(n);
b356f76d 266 NetClientState *nc = qemu_get_queue(n->nic);
441537f1 267 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
aa858194
SWL
268 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
269 n->max_ncs - n->max_queue_pairs : 0;
b356f76d 270
ed8b4afe 271 if (!get_vhost_net(nc->peer)) {
afbaa7b4
MT
272 return;
273 }
fed699f9 274
8c1ac475
RK
275 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
276 !!n->vhost_started) {
afbaa7b4
MT
277 return;
278 }
279 if (!n->vhost_started) {
086abc1c
MT
280 int r, i;
281
1bfa316c
GK
282 if (n->needs_vnet_hdr_swap) {
283 error_report("backend does not support %s vnet headers; "
284 "falling back on userspace virtio",
285 virtio_is_big_endian(vdev) ? "BE" : "LE");
286 return;
287 }
288
086abc1c
MT
289 /* Any packets outstanding? Purge them to avoid touching rings
290 * when vhost is running.
291 */
441537f1 292 for (i = 0; i < queue_pairs; i++) {
086abc1c
MT
293 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
294
295 /* Purge both directions: TX and RX. */
296 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
297 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
298 }
299
a93e599d
MC
300 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
301 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
302 if (r < 0) {
303 error_report("%uBytes MTU not supported by the backend",
304 n->net_conf.mtu);
305
306 return;
307 }
308 }
309
1830b80f 310 n->vhost_started = 1;
22288fe5 311 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
afbaa7b4 312 if (r < 0) {
e7b43f7e
SH
313 error_report("unable to start vhost net: %d: "
314 "falling back on userspace virtio", -r);
1830b80f 315 n->vhost_started = 0;
afbaa7b4
MT
316 }
317 } else {
22288fe5 318 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
afbaa7b4
MT
319 n->vhost_started = 0;
320 }
321}
322
1bfa316c
GK
323static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
324 NetClientState *peer,
325 bool enable)
326{
327 if (virtio_is_big_endian(vdev)) {
328 return qemu_set_vnet_be(peer, enable);
329 } else {
330 return qemu_set_vnet_le(peer, enable);
331 }
332}
333
334static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
441537f1 335 int queue_pairs, bool enable)
1bfa316c
GK
336{
337 int i;
338
441537f1 339 for (i = 0; i < queue_pairs; i++) {
1bfa316c
GK
340 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
341 enable) {
342 while (--i >= 0) {
343 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
344 }
345
346 return true;
347 }
348 }
349
350 return false;
351}
352
353static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
354{
355 VirtIODevice *vdev = VIRTIO_DEVICE(n);
441537f1 356 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
1bfa316c
GK
357
358 if (virtio_net_started(n, status)) {
359 /* Before using the device, we tell the network backend about the
360 * endianness to use when parsing vnet headers. If the backend
361 * can't do it, we fallback onto fixing the headers in the core
362 * virtio-net code.
363 */
364 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
441537f1 365 queue_pairs, true);
1bfa316c
GK
366 } else if (virtio_net_started(n, vdev->status)) {
367 /* After using the device, we need to reset the network backend to
368 * the default (guest native endianness), otherwise the guest may
369 * lose network connectivity if it is rebooted into a different
370 * endianness.
371 */
441537f1 372 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
1bfa316c
GK
373 }
374}
375
283e2c2a
YB
376static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
377{
378 unsigned int dropped = virtqueue_drop_all(vq);
379 if (dropped) {
380 virtio_notify(vdev, vq);
381 }
382}
383
783e7706
MT
384static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
385{
17a0ca55 386 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9
JW
387 VirtIONetQueue *q;
388 int i;
389 uint8_t queue_status;
783e7706 390
1bfa316c 391 virtio_net_vnet_endian_status(n, status);
783e7706
MT
392 virtio_net_vhost_status(n, status);
393
441537f1 394 for (i = 0; i < n->max_queue_pairs; i++) {
38705bb5
FZ
395 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
396 bool queue_started;
fed699f9 397 q = &n->vqs[i];
783e7706 398
441537f1 399 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
fed699f9 400 queue_status = 0;
783e7706 401 } else {
fed699f9 402 queue_status = status;
783e7706 403 }
38705bb5
FZ
404 queue_started =
405 virtio_net_started(n, queue_status) && !n->vhost_started;
406
407 if (queue_started) {
408 qemu_flush_queued_packets(ncs);
409 }
fed699f9
JW
410
411 if (!q->tx_waiting) {
412 continue;
413 }
414
38705bb5 415 if (queue_started) {
fed699f9 416 if (q->tx_timer) {
bc72ad67
AB
417 timer_mod(q->tx_timer,
418 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
fed699f9
JW
419 } else {
420 qemu_bh_schedule(q->tx_bh);
421 }
783e7706 422 } else {
fed699f9 423 if (q->tx_timer) {
bc72ad67 424 timer_del(q->tx_timer);
fed699f9
JW
425 } else {
426 qemu_bh_cancel(q->tx_bh);
427 }
283e2c2a 428 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
70e53e6e
JW
429 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
430 vdev->vm_running) {
283e2c2a
YB
431 /* if tx is waiting we are likely have some packets in tx queue
432 * and disabled notification */
433 q->tx_waiting = 0;
434 virtio_queue_set_notification(q->tx_vq, 1);
435 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
436 }
783e7706
MT
437 }
438 }
439}
440
4e68f7a0 441static void virtio_net_set_link_status(NetClientState *nc)
554c97dd 442{
cc1f0f45 443 VirtIONet *n = qemu_get_nic_opaque(nc);
17a0ca55 444 VirtIODevice *vdev = VIRTIO_DEVICE(n);
554c97dd
AL
445 uint16_t old_status = n->status;
446
eb6b6c12 447 if (nc->link_down)
554c97dd
AL
448 n->status &= ~VIRTIO_NET_S_LINK_UP;
449 else
450 n->status |= VIRTIO_NET_S_LINK_UP;
451
452 if (n->status != old_status)
17a0ca55 453 virtio_notify_config(vdev);
afbaa7b4 454
17a0ca55 455 virtio_net_set_status(vdev, vdev->status);
554c97dd
AL
456}
457
b1be4280
AK
458static void rxfilter_notify(NetClientState *nc)
459{
b1be4280
AK
460 VirtIONet *n = qemu_get_nic_opaque(nc);
461
462 if (nc->rxfilter_notify_enabled) {
ddfb0baa 463 char *path = object_get_canonical_path(OBJECT(n->qdev));
7480874a 464 qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
96e35046 465 g_free(path);
b1be4280
AK
466
467 /* disable event notification to avoid events flooding */
468 nc->rxfilter_notify_enabled = 0;
469 }
470}
471
f7bc8ef8
AK
472static intList *get_vlan_table(VirtIONet *n)
473{
54aa3de7 474 intList *list;
f7bc8ef8
AK
475 int i, j;
476
477 list = NULL;
478 for (i = 0; i < MAX_VLAN >> 5; i++) {
479 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
480 if (n->vlans[i] & (1U << j)) {
54aa3de7 481 QAPI_LIST_PREPEND(list, (i << 5) + j);
f7bc8ef8
AK
482 }
483 }
484 }
485
486 return list;
487}
488
b1be4280
AK
489static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
490{
491 VirtIONet *n = qemu_get_nic_opaque(nc);
f7bc8ef8 492 VirtIODevice *vdev = VIRTIO_DEVICE(n);
b1be4280 493 RxFilterInfo *info;
54aa3de7 494 strList *str_list;
f7bc8ef8 495 int i;
b1be4280
AK
496
497 info = g_malloc0(sizeof(*info));
498 info->name = g_strdup(nc->name);
499 info->promiscuous = n->promisc;
500
501 if (n->nouni) {
502 info->unicast = RX_STATE_NONE;
503 } else if (n->alluni) {
504 info->unicast = RX_STATE_ALL;
505 } else {
506 info->unicast = RX_STATE_NORMAL;
507 }
508
509 if (n->nomulti) {
510 info->multicast = RX_STATE_NONE;
511 } else if (n->allmulti) {
512 info->multicast = RX_STATE_ALL;
513 } else {
514 info->multicast = RX_STATE_NORMAL;
515 }
516
517 info->broadcast_allowed = n->nobcast;
518 info->multicast_overflow = n->mac_table.multi_overflow;
519 info->unicast_overflow = n->mac_table.uni_overflow;
520
b0575ba4 521 info->main_mac = qemu_mac_strdup_printf(n->mac);
b1be4280
AK
522
523 str_list = NULL;
524 for (i = 0; i < n->mac_table.first_multi; i++) {
54aa3de7
EB
525 QAPI_LIST_PREPEND(str_list,
526 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
b1be4280
AK
527 }
528 info->unicast_table = str_list;
529
530 str_list = NULL;
531 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
54aa3de7
EB
532 QAPI_LIST_PREPEND(str_list,
533 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
b1be4280
AK
534 }
535 info->multicast_table = str_list;
f7bc8ef8 536 info->vlan_table = get_vlan_table(n);
b1be4280 537
95129d6f 538 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
f7bc8ef8
AK
539 info->vlan = RX_STATE_ALL;
540 } else if (!info->vlan_table) {
541 info->vlan = RX_STATE_NONE;
542 } else {
543 info->vlan = RX_STATE_NORMAL;
b1be4280 544 }
b1be4280
AK
545
546 /* enable event notification after query */
547 nc->rxfilter_notify_enabled = 1;
548
549 return info;
550}
551
7dc6be52
XZ
552static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
553{
554 VirtIONet *n = VIRTIO_NET(vdev);
f47af0af
XZ
555 NetClientState *nc;
556
557 /* validate queue_index and skip for cvq */
558 if (queue_index >= n->max_queue_pairs * 2) {
559 return;
560 }
561
562 nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
7dc6be52
XZ
563
564 if (!nc->peer) {
565 return;
566 }
567
568 if (get_vhost_net(nc->peer) &&
569 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
570 vhost_net_virtqueue_reset(vdev, nc, queue_index);
571 }
572
573 flush_or_purge_queued_packets(nc);
574}
575
7f863302
KX
576static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
577{
578 VirtIONet *n = VIRTIO_NET(vdev);
f47af0af 579 NetClientState *nc;
7f863302
KX
580 int r;
581
f47af0af
XZ
582 /* validate queue_index and skip for cvq */
583 if (queue_index >= n->max_queue_pairs * 2) {
584 return;
585 }
586
587 nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
588
7f863302
KX
589 if (!nc->peer || !vdev->vhost_started) {
590 return;
591 }
592
593 if (get_vhost_net(nc->peer) &&
594 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
595 r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
596 if (r < 0) {
597 error_report("unable to restart vhost net virtqueue: %d, "
598 "when resetting the queue", queue_index);
599 }
600 }
601}
602
002437cd
AL
603static void virtio_net_reset(VirtIODevice *vdev)
604{
17a0ca55 605 VirtIONet *n = VIRTIO_NET(vdev);
94b52958 606 int i;
002437cd
AL
607
608 /* Reset back to compatibility mode */
609 n->promisc = 1;
610 n->allmulti = 0;
015cb166
AW
611 n->alluni = 0;
612 n->nomulti = 0;
613 n->nouni = 0;
614 n->nobcast = 0;
fed699f9 615 /* multiqueue is disabled by default */
441537f1 616 n->curr_queue_pairs = 1;
9d8c6a25
DDAG
617 timer_del(n->announce_timer.tm);
618 n->announce_timer.round = 0;
f57fcf70 619 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
b6503ed9 620
f21c0ed9 621 /* Flush any MAC and VLAN filter table state */
b6503ed9 622 n->mac_table.in_use = 0;
2d9aba39 623 n->mac_table.first_multi = 0;
8fd2a2f1
AW
624 n->mac_table.multi_overflow = 0;
625 n->mac_table.uni_overflow = 0;
b6503ed9 626 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
41dc8a67 627 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
702d66a8 628 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
f21c0ed9 629 memset(n->vlans, 0, MAX_VLAN >> 3);
94b52958
GK
630
631 /* Flush any async TX */
441537f1 632 for (i = 0; i < n->max_queue_pairs; i++) {
4fdf69ab 633 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
94b52958 634 }
002437cd
AL
635}
636
6e371ab8 637static void peer_test_vnet_hdr(VirtIONet *n)
3a330134 638{
b356f76d
JW
639 NetClientState *nc = qemu_get_queue(n->nic);
640 if (!nc->peer) {
6e371ab8 641 return;
b356f76d 642 }
3a330134 643
d6085e3a 644 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
6e371ab8 645}
3a330134 646
6e371ab8
MT
647static int peer_has_vnet_hdr(VirtIONet *n)
648{
3a330134
MM
649 return n->has_vnet_hdr;
650}
651
0ce0e8f4
MM
652static int peer_has_ufo(VirtIONet *n)
653{
654 if (!peer_has_vnet_hdr(n))
655 return 0;
656
d6085e3a 657 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
0ce0e8f4
MM
658
659 return n->has_ufo;
660}
661
bb9d17f8 662static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
e22f0603 663 int version_1, int hash_report)
ff3a8066 664{
fed699f9
JW
665 int i;
666 NetClientState *nc;
667
ff3a8066
MT
668 n->mergeable_rx_bufs = mergeable_rx_bufs;
669
bb9d17f8 670 if (version_1) {
e22f0603
YB
671 n->guest_hdr_len = hash_report ?
672 sizeof(struct virtio_net_hdr_v1_hash) :
673 sizeof(struct virtio_net_hdr_mrg_rxbuf);
674 n->rss_data.populate_hash = !!hash_report;
bb9d17f8
CH
675 } else {
676 n->guest_hdr_len = n->mergeable_rx_bufs ?
677 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
678 sizeof(struct virtio_net_hdr);
679 }
ff3a8066 680
441537f1 681 for (i = 0; i < n->max_queue_pairs; i++) {
fed699f9
JW
682 nc = qemu_get_subqueue(n->nic, i);
683
684 if (peer_has_vnet_hdr(n) &&
d6085e3a
SH
685 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
686 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
fed699f9
JW
687 n->host_hdr_len = n->guest_hdr_len;
688 }
ff3a8066
MT
689 }
690}
691
2eef278b
MT
692static int virtio_net_max_tx_queue_size(VirtIONet *n)
693{
694 NetClientState *peer = n->nic_conf.peers.ncs[0];
695
696 /*
0ea5778f
EP
697 * Backends other than vhost-user or vhost-vdpa don't support max queue
698 * size.
2eef278b
MT
699 */
700 if (!peer) {
701 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
702 }
703
0ea5778f
EP
704 switch(peer->info->type) {
705 case NET_CLIENT_DRIVER_VHOST_USER:
706 case NET_CLIENT_DRIVER_VHOST_VDPA:
707 return VIRTQUEUE_MAX_SIZE;
708 default:
2eef278b 709 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
0ea5778f 710 };
2eef278b
MT
711}
712
fed699f9
JW
713static int peer_attach(VirtIONet *n, int index)
714{
715 NetClientState *nc = qemu_get_subqueue(n->nic, index);
716
717 if (!nc->peer) {
718 return 0;
719 }
720
f394b2e2 721 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
7263a0ad
CO
722 vhost_set_vring_enable(nc->peer, 1);
723 }
724
f394b2e2 725 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
fed699f9
JW
726 return 0;
727 }
728
441537f1 729 if (n->max_queue_pairs == 1) {
1074b879
JW
730 return 0;
731 }
732
fed699f9
JW
733 return tap_enable(nc->peer);
734}
735
736static int peer_detach(VirtIONet *n, int index)
737{
738 NetClientState *nc = qemu_get_subqueue(n->nic, index);
739
740 if (!nc->peer) {
741 return 0;
742 }
743
f394b2e2 744 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
7263a0ad
CO
745 vhost_set_vring_enable(nc->peer, 0);
746 }
747
f394b2e2 748 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
fed699f9
JW
749 return 0;
750 }
751
752 return tap_disable(nc->peer);
753}
754
441537f1 755static void virtio_net_set_queue_pairs(VirtIONet *n)
fed699f9
JW
756{
757 int i;
ddfa83ea 758 int r;
fed699f9 759
68b5f314
YB
760 if (n->nic->peer_deleted) {
761 return;
762 }
763
441537f1
JW
764 for (i = 0; i < n->max_queue_pairs; i++) {
765 if (i < n->curr_queue_pairs) {
ddfa83ea
JS
766 r = peer_attach(n, i);
767 assert(!r);
fed699f9 768 } else {
ddfa83ea
JS
769 r = peer_detach(n, i);
770 assert(!r);
fed699f9
JW
771 }
772 }
773}
774
ec57db16 775static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
fed699f9 776
9d5b731d
JW
777static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
778 Error **errp)
fbe78f4f 779{
17a0ca55 780 VirtIONet *n = VIRTIO_NET(vdev);
b356f76d 781 NetClientState *nc = qemu_get_queue(n->nic);
fbe78f4f 782
da3e8a23
SZ
783 /* Firstly sync all virtio-net possible supported features */
784 features |= n->host_features;
785
0cd09c3a 786 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
c9f79a3f 787
6e371ab8 788 if (!peer_has_vnet_hdr(n)) {
0cd09c3a
CH
789 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
790 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
791 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
792 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
8172539d 793
0cd09c3a
CH
794 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
795 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
796 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
797 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
e22f0603
YB
798
799 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
8172539d 800 }
3a330134 801
8172539d 802 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
0cd09c3a
CH
803 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
804 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
3a330134
MM
805 }
806
ed8b4afe 807 if (!get_vhost_net(nc->peer)) {
9bc6304c
MT
808 return features;
809 }
2974e916 810
0145c393
AM
811 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
812 virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
813 }
75ebec11
MC
814 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
815 vdev->backend_features = features;
816
817 if (n->mtu_bypass_backend &&
818 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
819 features |= (1ULL << VIRTIO_NET_F_MTU);
820 }
821
cd69d47c
EP
822 /*
823 * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
824 * enabled. This happens in the vDPA case.
825 *
826 * Make sure the feature set is not incoherent, as the driver could refuse
827 * to start.
828 *
829 * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
830 * helping guest to notify the new location with vDPA devices that does not
831 * support it.
832 */
833 if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
834 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
835 }
836
75ebec11 837 return features;
fbe78f4f
AL
838}
839
019a3edb 840static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
8eca6b1b 841{
019a3edb 842 uint64_t features = 0;
8eca6b1b
AL
843
844 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
845 * but also these: */
0cd09c3a
CH
846 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
847 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
848 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
849 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
850 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
8eca6b1b 851
8172539d 852 return features;
8eca6b1b
AL
853}
854
644c9858
DF
855static void virtio_net_apply_guest_offloads(VirtIONet *n)
856{
ad37bb3b 857 qemu_set_offload(qemu_get_queue(n->nic)->peer,
644c9858
DF
858 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
859 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
860 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
861 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
862 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
863}
864
865static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
866{
867 static const uint64_t guest_offloads_mask =
868 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
869 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
870 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
871 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
872 (1ULL << VIRTIO_NET_F_GUEST_UFO);
873
874 return guest_offloads_mask & features;
875}
876
0b545b1e 877uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
644c9858
DF
878{
879 VirtIODevice *vdev = VIRTIO_DEVICE(n);
880 return virtio_net_guest_offloads_by_features(vdev->guest_features);
881}
882
f5e1847b
JQ
883typedef struct {
884 VirtIONet *n;
12b2fad7
KW
885 DeviceState *dev;
886} FailoverDevice;
f5e1847b
JQ
887
888/**
12b2fad7 889 * Set the failover primary device
f5e1847b
JQ
890 *
891 * @opaque: FailoverId to setup
892 * @opts: opts for device we are handling
893 * @errp: returns an error if this function fails
894 */
12b2fad7 895static int failover_set_primary(DeviceState *dev, void *opaque)
f5e1847b 896{
12b2fad7
KW
897 FailoverDevice *fdev = opaque;
898 PCIDevice *pci_dev = (PCIDevice *)
899 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
f5e1847b 900
12b2fad7
KW
901 if (!pci_dev) {
902 return 0;
903 }
904
905 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
906 fdev->dev = dev;
f5e1847b
JQ
907 return 1;
908 }
909
910 return 0;
911}
912
21e8709b
JQ
913/**
914 * Find the primary device for this failover virtio-net
915 *
916 * @n: VirtIONet device
917 * @errp: returns an error if this function fails
918 */
919static DeviceState *failover_find_primary_device(VirtIONet *n)
920{
12b2fad7
KW
921 FailoverDevice fdev = {
922 .n = n,
923 };
21e8709b 924
12b2fad7
KW
925 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
926 NULL, NULL, &fdev);
927 return fdev.dev;
21e8709b
JQ
928}
929
9711cd0d
JF
930static void failover_add_primary(VirtIONet *n, Error **errp)
931{
932 Error *err = NULL;
21e8709b 933 DeviceState *dev = failover_find_primary_device(n);
9711cd0d 934
21e8709b 935 if (dev) {
117378bf
JF
936 return;
937 }
938
259a10db 939 if (!n->primary_opts) {
97ca9c59
LV
940 error_setg(errp, "Primary device not found");
941 error_append_hint(errp, "Virtio-net failover will not work. Make "
942 "sure primary device has parameter"
943 " failover_pair_id=%s\n", n->netclient_name);
3abad4a2
JQ
944 return;
945 }
259a10db 946
f3558b1b
KW
947 dev = qdev_device_add_from_qdict(n->primary_opts,
948 n->primary_opts_from_json,
949 &err);
97ca9c59 950 if (err) {
f3558b1b 951 qobject_unref(n->primary_opts);
259a10db 952 n->primary_opts = NULL;
9711cd0d 953 } else {
97ca9c59 954 object_unref(OBJECT(dev));
1c775d65 955 }
2155ceaf 956 error_propagate(errp, err);
9711cd0d
JF
957}
958
d5aaa1b0 959static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
fbe78f4f 960{
17a0ca55 961 VirtIONet *n = VIRTIO_NET(vdev);
9711cd0d 962 Error *err = NULL;
fed699f9
JW
963 int i;
964
75ebec11
MC
965 if (n->mtu_bypass_backend &&
966 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
967 features &= ~(1ULL << VIRTIO_NET_F_MTU);
968 }
969
ef546f12 970 virtio_net_set_multiqueue(n,
59079029 971 virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
95129d6f 972 virtio_has_feature(features, VIRTIO_NET_F_MQ));
fbe78f4f 973
ef546f12 974 virtio_net_set_mrg_rx_bufs(n,
95129d6f
CH
975 virtio_has_feature(features,
976 VIRTIO_NET_F_MRG_RXBUF),
977 virtio_has_feature(features,
e22f0603
YB
978 VIRTIO_F_VERSION_1),
979 virtio_has_feature(features,
980 VIRTIO_NET_F_HASH_REPORT));
f5436dd9 981
2974e916
YB
982 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
983 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
984 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
985 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
e22f0603 986 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
2974e916 987
f5436dd9 988 if (n->has_vnet_hdr) {
644c9858
DF
989 n->curr_guest_offloads =
990 virtio_net_guest_offloads_by_features(features);
991 virtio_net_apply_guest_offloads(n);
f5436dd9 992 }
fed699f9 993
441537f1 994 for (i = 0; i < n->max_queue_pairs; i++) {
fed699f9
JW
995 NetClientState *nc = qemu_get_subqueue(n->nic, i);
996
ed8b4afe 997 if (!get_vhost_net(nc->peer)) {
fed699f9
JW
998 continue;
999 }
ed8b4afe 1000 vhost_net_ack_features(get_vhost_net(nc->peer), features);
c9bdc449
HH
1001
1002 /*
1003 * keep acked_features in NetVhostUserState up-to-date so it
1004 * can't miss any features configured by guest virtio driver.
1005 */
1006 vhost_net_save_acked_features(nc->peer);
dc14a397 1007 }
0b1eaa88 1008
95129d6f 1009 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
0b1eaa88
SF
1010 memset(n->vlans, 0, MAX_VLAN >> 3);
1011 } else {
1012 memset(n->vlans, 0xff, MAX_VLAN >> 3);
1013 }
9711cd0d
JF
1014
1015 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1016 qapi_event_send_failover_negotiated(n->netclient_name);
e2bde83e 1017 qatomic_set(&n->failover_primary_hidden, false);
9711cd0d
JF
1018 failover_add_primary(n, &err);
1019 if (err) {
1b529d90
LV
1020 if (!qtest_enabled()) {
1021 warn_report_err(err);
1022 } else {
1023 error_free(err);
1024 }
9711cd0d
JF
1025 }
1026 }
fbe78f4f
AL
1027}
1028
002437cd 1029static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
921ac5d0 1030 struct iovec *iov, unsigned int iov_cnt)
002437cd
AL
1031{
1032 uint8_t on;
921ac5d0 1033 size_t s;
b1be4280 1034 NetClientState *nc = qemu_get_queue(n->nic);
002437cd 1035
921ac5d0
MT
1036 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1037 if (s != sizeof(on)) {
1038 return VIRTIO_NET_ERR;
002437cd
AL
1039 }
1040
dd23454b 1041 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
002437cd 1042 n->promisc = on;
dd23454b 1043 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
002437cd 1044 n->allmulti = on;
dd23454b 1045 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
015cb166 1046 n->alluni = on;
dd23454b 1047 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
015cb166 1048 n->nomulti = on;
dd23454b 1049 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
015cb166 1050 n->nouni = on;
dd23454b 1051 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
015cb166 1052 n->nobcast = on;
921ac5d0 1053 } else {
002437cd 1054 return VIRTIO_NET_ERR;
921ac5d0 1055 }
002437cd 1056
b1be4280
AK
1057 rxfilter_notify(nc);
1058
002437cd
AL
1059 return VIRTIO_NET_OK;
1060}
1061
644c9858
DF
1062static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1063 struct iovec *iov, unsigned int iov_cnt)
1064{
1065 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1066 uint64_t offloads;
1067 size_t s;
1068
95129d6f 1069 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
644c9858
DF
1070 return VIRTIO_NET_ERR;
1071 }
1072
1073 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1074 if (s != sizeof(offloads)) {
1075 return VIRTIO_NET_ERR;
1076 }
1077
1078 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1079 uint64_t supported_offloads;
1080
189ae6bb
JW
1081 offloads = virtio_ldq_p(vdev, &offloads);
1082
644c9858
DF
1083 if (!n->has_vnet_hdr) {
1084 return VIRTIO_NET_ERR;
1085 }
1086
2974e916
YB
1087 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1088 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1089 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1090 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1091 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1092
644c9858
DF
1093 supported_offloads = virtio_net_supported_guest_offloads(n);
1094 if (offloads & ~supported_offloads) {
1095 return VIRTIO_NET_ERR;
1096 }
1097
1098 n->curr_guest_offloads = offloads;
1099 virtio_net_apply_guest_offloads(n);
1100
1101 return VIRTIO_NET_OK;
1102 } else {
1103 return VIRTIO_NET_ERR;
1104 }
1105}
1106
b6503ed9 1107static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
921ac5d0 1108 struct iovec *iov, unsigned int iov_cnt)
b6503ed9 1109{
1399c60d 1110 VirtIODevice *vdev = VIRTIO_DEVICE(n);
b6503ed9 1111 struct virtio_net_ctrl_mac mac_data;
921ac5d0 1112 size_t s;
b1be4280 1113 NetClientState *nc = qemu_get_queue(n->nic);
b6503ed9 1114
c1943a3f
AK
1115 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1116 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1117 return VIRTIO_NET_ERR;
1118 }
1119 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1120 assert(s == sizeof(n->mac));
b356f76d 1121 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
b1be4280
AK
1122 rxfilter_notify(nc);
1123
c1943a3f
AK
1124 return VIRTIO_NET_OK;
1125 }
1126
921ac5d0 1127 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
b6503ed9 1128 return VIRTIO_NET_ERR;
921ac5d0 1129 }
b6503ed9 1130
cae2e556
AK
1131 int in_use = 0;
1132 int first_multi = 0;
1133 uint8_t uni_overflow = 0;
1134 uint8_t multi_overflow = 0;
1135 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
b6503ed9 1136
921ac5d0
MT
1137 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1138 sizeof(mac_data.entries));
1399c60d 1139 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
921ac5d0 1140 if (s != sizeof(mac_data.entries)) {
b1be4280 1141 goto error;
921ac5d0
MT
1142 }
1143 iov_discard_front(&iov, &iov_cnt, s);
b6503ed9 1144
921ac5d0 1145 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
b1be4280 1146 goto error;
921ac5d0 1147 }
b6503ed9
AL
1148
1149 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
cae2e556 1150 s = iov_to_buf(iov, iov_cnt, 0, macs,
921ac5d0
MT
1151 mac_data.entries * ETH_ALEN);
1152 if (s != mac_data.entries * ETH_ALEN) {
b1be4280 1153 goto error;
921ac5d0 1154 }
cae2e556 1155 in_use += mac_data.entries;
b6503ed9 1156 } else {
cae2e556 1157 uni_overflow = 1;
b6503ed9
AL
1158 }
1159
921ac5d0
MT
1160 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1161
cae2e556 1162 first_multi = in_use;
2d9aba39 1163
921ac5d0
MT
1164 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1165 sizeof(mac_data.entries));
1399c60d 1166 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
921ac5d0 1167 if (s != sizeof(mac_data.entries)) {
b1be4280 1168 goto error;
921ac5d0
MT
1169 }
1170
1171 iov_discard_front(&iov, &iov_cnt, s);
b6503ed9 1172
921ac5d0 1173 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
b1be4280 1174 goto error;
921ac5d0 1175 }
b6503ed9 1176
edc24385 1177 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
cae2e556 1178 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
921ac5d0
MT
1179 mac_data.entries * ETH_ALEN);
1180 if (s != mac_data.entries * ETH_ALEN) {
b1be4280 1181 goto error;
8fd2a2f1 1182 }
cae2e556 1183 in_use += mac_data.entries;
921ac5d0 1184 } else {
cae2e556 1185 multi_overflow = 1;
b6503ed9
AL
1186 }
1187
cae2e556
AK
1188 n->mac_table.in_use = in_use;
1189 n->mac_table.first_multi = first_multi;
1190 n->mac_table.uni_overflow = uni_overflow;
1191 n->mac_table.multi_overflow = multi_overflow;
1192 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1193 g_free(macs);
b1be4280
AK
1194 rxfilter_notify(nc);
1195
b6503ed9 1196 return VIRTIO_NET_OK;
b1be4280
AK
1197
1198error:
cae2e556 1199 g_free(macs);
b1be4280 1200 return VIRTIO_NET_ERR;
b6503ed9
AL
1201}
1202
f21c0ed9 1203static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
921ac5d0 1204 struct iovec *iov, unsigned int iov_cnt)
f21c0ed9 1205{
1399c60d 1206 VirtIODevice *vdev = VIRTIO_DEVICE(n);
f21c0ed9 1207 uint16_t vid;
921ac5d0 1208 size_t s;
b1be4280 1209 NetClientState *nc = qemu_get_queue(n->nic);
f21c0ed9 1210
921ac5d0 1211 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1399c60d 1212 vid = virtio_lduw_p(vdev, &vid);
921ac5d0 1213 if (s != sizeof(vid)) {
f21c0ed9
AL
1214 return VIRTIO_NET_ERR;
1215 }
1216
f21c0ed9
AL
1217 if (vid >= MAX_VLAN)
1218 return VIRTIO_NET_ERR;
1219
1220 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1221 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1222 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1223 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1224 else
1225 return VIRTIO_NET_ERR;
1226
b1be4280
AK
1227 rxfilter_notify(nc);
1228
f21c0ed9
AL
1229 return VIRTIO_NET_OK;
1230}
1231
f57fcf70
JW
1232static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1233 struct iovec *iov, unsigned int iov_cnt)
1234{
9d8c6a25 1235 trace_virtio_net_handle_announce(n->announce_timer.round);
f57fcf70
JW
1236 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1237 n->status & VIRTIO_NET_S_ANNOUNCE) {
1238 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
9d8c6a25
DDAG
1239 if (n->announce_timer.round) {
1240 qemu_announce_timer_step(&n->announce_timer);
f57fcf70
JW
1241 }
1242 return VIRTIO_NET_OK;
1243 } else {
1244 return VIRTIO_NET_ERR;
1245 }
1246}
1247
0145c393
AM
1248static void virtio_net_detach_epbf_rss(VirtIONet *n);
1249
59079029
YB
1250static void virtio_net_disable_rss(VirtIONet *n)
1251{
1252 if (n->rss_data.enabled) {
1253 trace_virtio_net_rss_disable();
1254 }
1255 n->rss_data.enabled = false;
0145c393
AM
1256
1257 virtio_net_detach_epbf_rss(n);
1258}
1259
1260static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1261{
1262 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1263 if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1264 return false;
1265 }
1266
1267 return nc->info->set_steering_ebpf(nc, prog_fd);
1268}
1269
1270static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1271 struct EBPFRSSConfig *config)
1272{
1273 config->redirect = data->redirect;
1274 config->populate_hash = data->populate_hash;
1275 config->hash_types = data->hash_types;
1276 config->indirections_len = data->indirections_len;
1277 config->default_queue = data->default_queue;
1278}
1279
1280static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1281{
1282 struct EBPFRSSConfig config = {};
1283
1284 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1285 return false;
1286 }
1287
1288 rss_data_to_rss_config(&n->rss_data, &config);
1289
1290 if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1291 n->rss_data.indirections_table, n->rss_data.key)) {
1292 return false;
1293 }
1294
1295 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1296 return false;
1297 }
1298
1299 return true;
1300}
1301
1302static void virtio_net_detach_epbf_rss(VirtIONet *n)
1303{
1304 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1305}
1306
1307static bool virtio_net_load_ebpf(VirtIONet *n)
1308{
1309 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1310 /* backend does't support steering ebpf */
1311 return false;
1312 }
1313
1314 return ebpf_rss_load(&n->ebpf_rss);
1315}
1316
1317static void virtio_net_unload_ebpf(VirtIONet *n)
1318{
1319 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1320 ebpf_rss_unload(&n->ebpf_rss);
59079029
YB
1321}
1322
1323static uint16_t virtio_net_handle_rss(VirtIONet *n,
e22f0603
YB
1324 struct iovec *iov,
1325 unsigned int iov_cnt,
1326 bool do_rss)
59079029
YB
1327{
1328 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1329 struct virtio_net_rss_config cfg;
1330 size_t s, offset = 0, size_get;
441537f1 1331 uint16_t queue_pairs, i;
59079029
YB
1332 struct {
1333 uint16_t us;
1334 uint8_t b;
1335 } QEMU_PACKED temp;
1336 const char *err_msg = "";
1337 uint32_t err_value = 0;
1338
e22f0603 1339 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
59079029
YB
1340 err_msg = "RSS is not negotiated";
1341 goto error;
1342 }
e22f0603
YB
1343 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1344 err_msg = "Hash report is not negotiated";
1345 goto error;
1346 }
59079029
YB
1347 size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1348 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1349 if (s != size_get) {
1350 err_msg = "Short command buffer";
1351 err_value = (uint32_t)s;
1352 goto error;
1353 }
1354 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1355 n->rss_data.indirections_len =
1356 virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1357 n->rss_data.indirections_len++;
e22f0603
YB
1358 if (!do_rss) {
1359 n->rss_data.indirections_len = 1;
1360 }
59079029
YB
1361 if (!is_power_of_2(n->rss_data.indirections_len)) {
1362 err_msg = "Invalid size of indirection table";
1363 err_value = n->rss_data.indirections_len;
1364 goto error;
1365 }
1366 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1367 err_msg = "Too large indirection table";
1368 err_value = n->rss_data.indirections_len;
1369 goto error;
1370 }
e22f0603
YB
1371 n->rss_data.default_queue = do_rss ?
1372 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
441537f1 1373 if (n->rss_data.default_queue >= n->max_queue_pairs) {
59079029
YB
1374 err_msg = "Invalid default queue";
1375 err_value = n->rss_data.default_queue;
1376 goto error;
1377 }
1378 offset += size_get;
1379 size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1380 g_free(n->rss_data.indirections_table);
1381 n->rss_data.indirections_table = g_malloc(size_get);
1382 if (!n->rss_data.indirections_table) {
1383 err_msg = "Can't allocate indirections table";
1384 err_value = n->rss_data.indirections_len;
1385 goto error;
1386 }
1387 s = iov_to_buf(iov, iov_cnt, offset,
1388 n->rss_data.indirections_table, size_get);
1389 if (s != size_get) {
1390 err_msg = "Short indirection table buffer";
1391 err_value = (uint32_t)s;
1392 goto error;
1393 }
1394 for (i = 0; i < n->rss_data.indirections_len; ++i) {
1395 uint16_t val = n->rss_data.indirections_table[i];
1396 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1397 }
1398 offset += size_get;
1399 size_get = sizeof(temp);
1400 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1401 if (s != size_get) {
441537f1 1402 err_msg = "Can't get queue_pairs";
59079029
YB
1403 err_value = (uint32_t)s;
1404 goto error;
1405 }
441537f1
JW
1406 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1407 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1408 err_msg = "Invalid number of queue_pairs";
1409 err_value = queue_pairs;
59079029
YB
1410 goto error;
1411 }
1412 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1413 err_msg = "Invalid key size";
1414 err_value = temp.b;
1415 goto error;
1416 }
1417 if (!temp.b && n->rss_data.hash_types) {
1418 err_msg = "No key provided";
1419 err_value = 0;
1420 goto error;
1421 }
1422 if (!temp.b && !n->rss_data.hash_types) {
1423 virtio_net_disable_rss(n);
441537f1 1424 return queue_pairs;
59079029
YB
1425 }
1426 offset += size_get;
1427 size_get = temp.b;
1428 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1429 if (s != size_get) {
1430 err_msg = "Can get key buffer";
1431 err_value = (uint32_t)s;
1432 goto error;
1433 }
1434 n->rss_data.enabled = true;
0145c393
AM
1435
1436 if (!n->rss_data.populate_hash) {
1437 if (!virtio_net_attach_epbf_rss(n)) {
1438 /* EBPF must be loaded for vhost */
1439 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1440 warn_report("Can't load eBPF RSS for vhost");
1441 goto error;
1442 }
1443 /* fallback to software RSS */
1444 warn_report("Can't load eBPF RSS - fallback to software RSS");
1445 n->rss_data.enabled_software_rss = true;
1446 }
1447 } else {
1448 /* use software RSS for hash populating */
1449 /* and detach eBPF if was loaded before */
1450 virtio_net_detach_epbf_rss(n);
1451 n->rss_data.enabled_software_rss = true;
1452 }
1453
59079029
YB
1454 trace_virtio_net_rss_enable(n->rss_data.hash_types,
1455 n->rss_data.indirections_len,
1456 temp.b);
441537f1 1457 return queue_pairs;
59079029
YB
1458error:
1459 trace_virtio_net_rss_error(err_msg, err_value);
1460 virtio_net_disable_rss(n);
1461 return 0;
1462}
1463
fed699f9 1464static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
f8f7c533 1465 struct iovec *iov, unsigned int iov_cnt)
fed699f9 1466{
17a0ca55 1467 VirtIODevice *vdev = VIRTIO_DEVICE(n);
441537f1 1468 uint16_t queue_pairs;
2a7888cc 1469 NetClientState *nc = qemu_get_queue(n->nic);
fed699f9 1470
59079029 1471 virtio_net_disable_rss(n);
e22f0603 1472 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
441537f1
JW
1473 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1474 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
e22f0603 1475 }
59079029 1476 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
441537f1 1477 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
59079029
YB
1478 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1479 struct virtio_net_ctrl_mq mq;
1480 size_t s;
1481 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1482 return VIRTIO_NET_ERR;
1483 }
1484 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1485 if (s != sizeof(mq)) {
1486 return VIRTIO_NET_ERR;
1487 }
441537f1 1488 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
fed699f9 1489
59079029 1490 } else {
fed699f9
JW
1491 return VIRTIO_NET_ERR;
1492 }
1493
441537f1
JW
1494 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1495 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1496 queue_pairs > n->max_queue_pairs ||
fed699f9
JW
1497 !n->multiqueue) {
1498 return VIRTIO_NET_ERR;
1499 }
1500
ca8717f9 1501 n->curr_queue_pairs = queue_pairs;
2a7888cc 1502 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
ca8717f9
EP
1503 /*
1504 * Avoid updating the backend for a vdpa device: We're only interested
1505 * in updating the device model queues.
1506 */
1507 return VIRTIO_NET_OK;
2a7888cc 1508 }
441537f1 1509 /* stop the backend before changing the number of queue_pairs to avoid handling a
fed699f9 1510 * disabled queue */
17a0ca55 1511 virtio_net_set_status(vdev, vdev->status);
441537f1 1512 virtio_net_set_queue_pairs(n);
fed699f9
JW
1513
1514 return VIRTIO_NET_OK;
1515}
ba7eadb5 1516
640b8a1c
EP
1517size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1518 const struct iovec *in_sg, unsigned in_num,
1519 const struct iovec *out_sg,
1520 unsigned out_num)
3d11d36c 1521{
17a0ca55 1522 VirtIONet *n = VIRTIO_NET(vdev);
3d11d36c
AL
1523 struct virtio_net_ctrl_hdr ctrl;
1524 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
921ac5d0 1525 size_t s;
771b6ed3 1526 struct iovec *iov, *iov2;
640b8a1c
EP
1527
1528 if (iov_size(in_sg, in_num) < sizeof(status) ||
1529 iov_size(out_sg, out_num) < sizeof(ctrl)) {
1530 virtio_error(vdev, "virtio-net ctrl missing headers");
1531 return 0;
1532 }
1533
1534 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1535 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1536 iov_discard_front(&iov, &out_num, sizeof(ctrl));
1537 if (s != sizeof(ctrl)) {
1538 status = VIRTIO_NET_ERR;
1539 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1540 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1541 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1542 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1543 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1544 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1545 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1546 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1547 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1548 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1549 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1550 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1551 }
1552
1553 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1554 assert(s == sizeof(status));
1555
1556 g_free(iov2);
1557 return sizeof(status);
1558}
1559
1560static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1561{
1562 VirtQueueElement *elem;
3d11d36c 1563
51b19ebe 1564 for (;;) {
640b8a1c 1565 size_t written;
51b19ebe
PB
1566 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1567 if (!elem) {
1568 break;
1569 }
640b8a1c
EP
1570
1571 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1572 elem->out_sg, elem->out_num);
1573 if (written > 0) {
1574 virtqueue_push(vq, elem, written);
1575 virtio_notify(vdev, vq);
1576 g_free(elem);
1577 } else {
ba7eadb5
GK
1578 virtqueue_detach_element(vq, elem, 0);
1579 g_free(elem);
1580 break;
3d11d36c 1581 }
3d11d36c
AL
1582 }
1583}
1584
fbe78f4f
AL
1585/* RX */
1586
1587static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1588{
17a0ca55 1589 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 1590 int queue_index = vq2q(virtio_get_queue_index(vq));
8aeff62d 1591
fed699f9 1592 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
fbe78f4f
AL
1593}
1594
b8c4b67e 1595static bool virtio_net_can_receive(NetClientState *nc)
fbe78f4f 1596{
cc1f0f45 1597 VirtIONet *n = qemu_get_nic_opaque(nc);
17a0ca55 1598 VirtIODevice *vdev = VIRTIO_DEVICE(n);
fed699f9 1599 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
0c87e93e 1600
17a0ca55 1601 if (!vdev->vm_running) {
b8c4b67e 1602 return false;
95477323 1603 }
cdd5cc12 1604
441537f1 1605 if (nc->queue_index >= n->curr_queue_pairs) {
b8c4b67e 1606 return false;
fed699f9
JW
1607 }
1608
0c87e93e 1609 if (!virtio_queue_ready(q->rx_vq) ||
17a0ca55 1610 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
b8c4b67e 1611 return false;
0c87e93e 1612 }
fbe78f4f 1613
b8c4b67e 1614 return true;
cdd5cc12
MM
1615}
1616
0c87e93e 1617static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
cdd5cc12 1618{
0c87e93e
JW
1619 VirtIONet *n = q->n;
1620 if (virtio_queue_empty(q->rx_vq) ||
fbe78f4f 1621 (n->mergeable_rx_bufs &&
0c87e93e
JW
1622 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1623 virtio_queue_set_notification(q->rx_vq, 1);
06b12970
TL
1624
1625 /* To avoid a race condition where the guest has made some buffers
1626 * available after the above check but before notification was
1627 * enabled, check for available buffers again.
1628 */
0c87e93e 1629 if (virtio_queue_empty(q->rx_vq) ||
06b12970 1630 (n->mergeable_rx_bufs &&
0c87e93e 1631 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
06b12970 1632 return 0;
0c87e93e 1633 }
fbe78f4f
AL
1634 }
1635
0c87e93e 1636 virtio_queue_set_notification(q->rx_vq, 0);
fbe78f4f
AL
1637 return 1;
1638}
1639
1399c60d 1640static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
032a74a1 1641{
1399c60d
RR
1642 virtio_tswap16s(vdev, &hdr->hdr_len);
1643 virtio_tswap16s(vdev, &hdr->gso_size);
1644 virtio_tswap16s(vdev, &hdr->csum_start);
1645 virtio_tswap16s(vdev, &hdr->csum_offset);
032a74a1
CLG
1646}
1647
1d41b0c1
AL
1648/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1649 * it never finds out that the packets don't have valid checksums. This
1650 * causes dhclient to get upset. Fedora's carried a patch for ages to
1651 * fix this with Xen but it hasn't appeared in an upstream release of
1652 * dhclient yet.
1653 *
1654 * To avoid breaking existing guests, we catch udp packets and add
1655 * checksums. This is terrible but it's better than hacking the guest
1656 * kernels.
1657 *
1658 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1659 * we should provide a mechanism to disable it to avoid polluting the host
1660 * cache.
1661 */
1662static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
22cc84db 1663 uint8_t *buf, size_t size)
1d41b0c1
AL
1664{
1665 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1666 (size > 27 && size < 1500) && /* normal sized MTU */
1667 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1668 (buf[23] == 17) && /* ip.protocol == UDP */
1669 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
f5746335 1670 net_checksum_calculate(buf, size, CSUM_UDP);
1d41b0c1
AL
1671 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1672 }
1673}
1674
280598b7
MT
1675static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1676 const void *buf, size_t size)
fbe78f4f 1677{
3a330134 1678 if (n->has_vnet_hdr) {
22cc84db
MT
1679 /* FIXME this cast is evil */
1680 void *wbuf = (void *)buf;
280598b7
MT
1681 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1682 size - n->host_hdr_len);
1bfa316c
GK
1683
1684 if (n->needs_vnet_hdr_swap) {
1685 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1686 }
280598b7 1687 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
22cc84db
MT
1688 } else {
1689 struct virtio_net_hdr hdr = {
1690 .flags = 0,
1691 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1692 };
1693 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
3a330134 1694 }
fbe78f4f
AL
1695}
1696
3831ab20
AL
1697static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1698{
1699 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
f21c0ed9 1700 static const uint8_t vlan[] = {0x81, 0x00};
3831ab20 1701 uint8_t *ptr = (uint8_t *)buf;
b6503ed9 1702 int i;
3831ab20
AL
1703
1704 if (n->promisc)
1705 return 1;
1706
e043ebc6 1707 ptr += n->host_hdr_len;
3a330134 1708
f21c0ed9 1709 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
7542d3e7 1710 int vid = lduw_be_p(ptr + 14) & 0xfff;
f21c0ed9
AL
1711 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1712 return 0;
1713 }
1714
bbe2f399
AW
1715 if (ptr[0] & 1) { // multicast
1716 if (!memcmp(ptr, bcast, sizeof(bcast))) {
015cb166
AW
1717 return !n->nobcast;
1718 } else if (n->nomulti) {
1719 return 0;
8fd2a2f1 1720 } else if (n->allmulti || n->mac_table.multi_overflow) {
bbe2f399
AW
1721 return 1;
1722 }
2d9aba39
AW
1723
1724 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1725 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1726 return 1;
1727 }
1728 }
bbe2f399 1729 } else { // unicast
015cb166
AW
1730 if (n->nouni) {
1731 return 0;
1732 } else if (n->alluni || n->mac_table.uni_overflow) {
8fd2a2f1
AW
1733 return 1;
1734 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
bbe2f399
AW
1735 return 1;
1736 }
3831ab20 1737
2d9aba39
AW
1738 for (i = 0; i < n->mac_table.first_multi; i++) {
1739 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1740 return 1;
1741 }
1742 }
b6503ed9
AL
1743 }
1744
3831ab20
AL
1745 return 0;
1746}
1747
69ff5ef8
AO
1748static uint8_t virtio_net_get_hash_type(bool hasip4,
1749 bool hasip6,
65f474bb 1750 EthL4HdrProto l4hdr_proto,
4474e37a
YB
1751 uint32_t types)
1752{
69ff5ef8 1753 if (hasip4) {
65f474bb
AO
1754 switch (l4hdr_proto) {
1755 case ETH_L4_HDR_PROTO_TCP:
1756 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1757 return NetPktRssIpV4Tcp;
1758 }
1759 break;
1760
1761 case ETH_L4_HDR_PROTO_UDP:
1762 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1763 return NetPktRssIpV4Udp;
1764 }
1765 break;
1766
1767 default:
1768 break;
4474e37a 1769 }
65f474bb 1770
4474e37a
YB
1771 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1772 return NetPktRssIpV4;
1773 }
69ff5ef8 1774 } else if (hasip6) {
65f474bb
AO
1775 switch (l4hdr_proto) {
1776 case ETH_L4_HDR_PROTO_TCP:
1777 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1778 return NetPktRssIpV6TcpEx;
1779 }
1780 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1781 return NetPktRssIpV6Tcp;
1782 }
1783 break;
4474e37a 1784
65f474bb
AO
1785 case ETH_L4_HDR_PROTO_UDP:
1786 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1787 return NetPktRssIpV6UdpEx;
1788 }
1789 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1790 return NetPktRssIpV6Udp;
1791 }
1792 break;
1793
1794 default:
1795 break;
4474e37a 1796 }
65f474bb
AO
1797
1798 if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1799 return NetPktRssIpV6Ex;
4474e37a 1800 }
65f474bb
AO
1801 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1802 return NetPktRssIpV6;
4474e37a
YB
1803 }
1804 }
1805 return 0xff;
1806}
1807
e22f0603
YB
1808static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1809 uint32_t hash)
1810{
1811 struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1812 hdr->hash_value = hash;
1813 hdr->hash_report = report;
1814}
1815
4474e37a
YB
1816static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1817 size_t size)
1818{
1819 VirtIONet *n = qemu_get_nic_opaque(nc);
e22f0603 1820 unsigned int index = nc->queue_index, new_index = index;
4474e37a
YB
1821 struct NetRxPkt *pkt = n->rx_pkt;
1822 uint8_t net_hash_type;
1823 uint32_t hash;
65f474bb
AO
1824 bool hasip4, hasip6;
1825 EthL4HdrProto l4hdr_proto;
e22f0603
YB
1826 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1827 VIRTIO_NET_HASH_REPORT_IPv4,
1828 VIRTIO_NET_HASH_REPORT_TCPv4,
1829 VIRTIO_NET_HASH_REPORT_TCPv6,
1830 VIRTIO_NET_HASH_REPORT_IPv6,
1831 VIRTIO_NET_HASH_REPORT_IPv6_EX,
1832 VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1833 VIRTIO_NET_HASH_REPORT_UDPv4,
1834 VIRTIO_NET_HASH_REPORT_UDPv6,
1835 VIRTIO_NET_HASH_REPORT_UDPv6_EX
1836 };
2f0fa232
AO
1837 struct iovec iov = {
1838 .iov_base = (void *)buf,
1839 .iov_len = size
1840 };
4474e37a 1841
2f0fa232 1842 net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
65f474bb
AO
1843 net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1844 net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
4474e37a
YB
1845 n->rss_data.hash_types);
1846 if (net_hash_type > NetPktRssIpV6UdpEx) {
e22f0603
YB
1847 if (n->rss_data.populate_hash) {
1848 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1849 }
1850 return n->rss_data.redirect ? n->rss_data.default_queue : -1;
4474e37a
YB
1851 }
1852
1853 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
e22f0603
YB
1854
1855 if (n->rss_data.populate_hash) {
1856 virtio_set_packet_hash(buf, reports[net_hash_type], hash);
4474e37a 1857 }
e22f0603
YB
1858
1859 if (n->rss_data.redirect) {
1860 new_index = hash & (n->rss_data.indirections_len - 1);
1861 new_index = n->rss_data.indirections_table[new_index];
1862 }
1863
1864 return (index == new_index) ? -1 : new_index;
4474e37a
YB
1865}
1866
97cd965c 1867static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
4474e37a 1868 size_t size, bool no_rss)
fbe78f4f 1869{
cc1f0f45 1870 VirtIONet *n = qemu_get_nic_opaque(nc);
fed699f9 1871 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
17a0ca55 1872 VirtIODevice *vdev = VIRTIO_DEVICE(n);
bedd7e93
JW
1873 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1874 size_t lens[VIRTQUEUE_MAX_SIZE];
63c58728
MT
1875 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1876 struct virtio_net_hdr_mrg_rxbuf mhdr;
1877 unsigned mhdr_cnt = 0;
bedd7e93
JW
1878 size_t offset, i, guest_offset, j;
1879 ssize_t err;
fbe78f4f 1880
fed699f9 1881 if (!virtio_net_can_receive(nc)) {
cdd5cc12 1882 return -1;
b356f76d 1883 }
cdd5cc12 1884
0145c393 1885 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
4474e37a
YB
1886 int index = virtio_net_process_rss(nc, buf, size);
1887 if (index >= 0) {
1888 NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1889 return virtio_net_receive_rcu(nc2, buf, size, true);
1890 }
1891 }
1892
940cda94 1893 /* hdr_len refers to the header we supply to the guest */
0c87e93e 1894 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
8aeff62d 1895 return 0;
0c87e93e 1896 }
fbe78f4f 1897
3831ab20 1898 if (!receive_filter(n, buf, size))
4f1c942b 1899 return size;
3831ab20 1900
fbe78f4f
AL
1901 offset = i = 0;
1902
1903 while (offset < size) {
51b19ebe 1904 VirtQueueElement *elem;
fbe78f4f 1905 int len, total;
51b19ebe 1906 const struct iovec *sg;
fbe78f4f 1907
22c253d9 1908 total = 0;
fbe78f4f 1909
bedd7e93
JW
1910 if (i == VIRTQUEUE_MAX_SIZE) {
1911 virtio_error(vdev, "virtio-net unexpected long buffer chain");
1912 err = size;
1913 goto err;
1914 }
1915
51b19ebe
PB
1916 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1917 if (!elem) {
ba10b9c0
GK
1918 if (i) {
1919 virtio_error(vdev, "virtio-net unexpected empty queue: "
1920 "i %zd mergeable %d offset %zd, size %zd, "
1921 "guest hdr len %zd, host hdr len %zd "
1922 "guest features 0x%" PRIx64,
1923 i, n->mergeable_rx_bufs, offset, size,
1924 n->guest_hdr_len, n->host_hdr_len,
1925 vdev->guest_features);
1926 }
bedd7e93
JW
1927 err = -1;
1928 goto err;
fbe78f4f
AL
1929 }
1930
51b19ebe 1931 if (elem->in_num < 1) {
ba10b9c0
GK
1932 virtio_error(vdev,
1933 "virtio-net receive queue contains no in buffers");
1934 virtqueue_detach_element(q->rx_vq, elem, 0);
1935 g_free(elem);
bedd7e93
JW
1936 err = -1;
1937 goto err;
fbe78f4f
AL
1938 }
1939
51b19ebe 1940 sg = elem->in_sg;
fbe78f4f 1941 if (i == 0) {
c8d28e7e 1942 assert(offset == 0);
63c58728
MT
1943 if (n->mergeable_rx_bufs) {
1944 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
51b19ebe 1945 sg, elem->in_num,
63c58728
MT
1946 offsetof(typeof(mhdr), num_buffers),
1947 sizeof(mhdr.num_buffers));
1948 }
fbe78f4f 1949
51b19ebe 1950 receive_header(n, sg, elem->in_num, buf, size);
e22f0603
YB
1951 if (n->rss_data.populate_hash) {
1952 offset = sizeof(mhdr);
1953 iov_from_buf(sg, elem->in_num, offset,
1954 buf + offset, n->host_hdr_len - sizeof(mhdr));
1955 }
c8d28e7e 1956 offset = n->host_hdr_len;
e35e23f6 1957 total += n->guest_hdr_len;
22cc84db
MT
1958 guest_offset = n->guest_hdr_len;
1959 } else {
1960 guest_offset = 0;
fbe78f4f
AL
1961 }
1962
1963 /* copy in packet. ugh */
51b19ebe 1964 len = iov_from_buf(sg, elem->in_num, guest_offset,
dcf6f5e1 1965 buf + offset, size - offset);
fbe78f4f 1966 total += len;
279a4253
MT
1967 offset += len;
1968 /* If buffers can't be merged, at this point we
1969 * must have consumed the complete packet.
1970 * Otherwise, drop it. */
1971 if (!n->mergeable_rx_bufs && offset < size) {
27e57efe 1972 virtqueue_unpop(q->rx_vq, elem, total);
51b19ebe 1973 g_free(elem);
bedd7e93
JW
1974 err = size;
1975 goto err;
279a4253 1976 }
fbe78f4f 1977
bedd7e93
JW
1978 elems[i] = elem;
1979 lens[i] = total;
1980 i++;
fbe78f4f
AL
1981 }
1982
63c58728 1983 if (mhdr_cnt) {
1399c60d 1984 virtio_stw_p(vdev, &mhdr.num_buffers, i);
63c58728
MT
1985 iov_from_buf(mhdr_sg, mhdr_cnt,
1986 0,
1987 &mhdr.num_buffers, sizeof mhdr.num_buffers);
44b15bc5 1988 }
fbe78f4f 1989
bedd7e93
JW
1990 for (j = 0; j < i; j++) {
1991 /* signal other side */
1992 virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1993 g_free(elems[j]);
1994 }
1995
0c87e93e 1996 virtqueue_flush(q->rx_vq, i);
17a0ca55 1997 virtio_notify(vdev, q->rx_vq);
4f1c942b
MM
1998
1999 return size;
bedd7e93
JW
2000
2001err:
2002 for (j = 0; j < i; j++) {
abe300d9 2003 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
bedd7e93
JW
2004 g_free(elems[j]);
2005 }
2006
2007 return err;
fbe78f4f
AL
2008}
2009
2974e916 2010static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
97cd965c
PB
2011 size_t size)
2012{
068ddfa9 2013 RCU_READ_LOCK_GUARD();
97cd965c 2014
4474e37a 2015 return virtio_net_receive_rcu(nc, buf, size, false);
97cd965c
PB
2016}
2017
2974e916
YB
2018static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2019 const uint8_t *buf,
2020 VirtioNetRscUnit *unit)
2021{
2022 uint16_t ip_hdrlen;
2023 struct ip_header *ip;
2024
2025 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2026 + sizeof(struct eth_header));
2027 unit->ip = (void *)ip;
2028 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2029 unit->ip_plen = &ip->ip_len;
2030 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2031 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2032 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
2033}
2034
2035static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2036 const uint8_t *buf,
2037 VirtioNetRscUnit *unit)
2038{
2039 struct ip6_header *ip6;
2040
2041 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2042 + sizeof(struct eth_header));
2043 unit->ip = ip6;
2044 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
78ee6bd0 2045 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2974e916
YB
2046 + sizeof(struct ip6_header));
2047 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2048
2049 /* There is a difference between payload lenght in ipv4 and v6,
2050 ip header is excluded in ipv6 */
2051 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
2052}
2053
2054static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2055 VirtioNetRscSeg *seg)
2056{
2057 int ret;
dd3d85e8 2058 struct virtio_net_hdr_v1 *h;
2974e916 2059
dd3d85e8 2060 h = (struct virtio_net_hdr_v1 *)seg->buf;
2974e916
YB
2061 h->flags = 0;
2062 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2063
2064 if (seg->is_coalesced) {
dd3d85e8
YB
2065 h->rsc.segments = seg->packets;
2066 h->rsc.dup_acks = seg->dup_ack;
2974e916
YB
2067 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2068 if (chain->proto == ETH_P_IP) {
2069 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2070 } else {
2071 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2072 }
2073 }
2074
2075 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2076 QTAILQ_REMOVE(&chain->buffers, seg, next);
2077 g_free(seg->buf);
2078 g_free(seg);
2079
2080 return ret;
2081}
2082
2083static void virtio_net_rsc_purge(void *opq)
2084{
2085 VirtioNetRscSeg *seg, *rn;
2086 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2087
2088 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2089 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2090 chain->stat.purge_failed++;
2091 continue;
2092 }
2093 }
2094
2095 chain->stat.timer++;
2096 if (!QTAILQ_EMPTY(&chain->buffers)) {
2097 timer_mod(chain->drain_timer,
2098 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2099 }
2100}
2101
2102static void virtio_net_rsc_cleanup(VirtIONet *n)
2103{
2104 VirtioNetRscChain *chain, *rn_chain;
2105 VirtioNetRscSeg *seg, *rn_seg;
2106
2107 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2108 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2109 QTAILQ_REMOVE(&chain->buffers, seg, next);
2110 g_free(seg->buf);
2111 g_free(seg);
2112 }
2113
2974e916
YB
2114 timer_free(chain->drain_timer);
2115 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2116 g_free(chain);
2117 }
2118}
2119
2120static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2121 NetClientState *nc,
2122 const uint8_t *buf, size_t size)
2123{
2124 uint16_t hdr_len;
2125 VirtioNetRscSeg *seg;
2126
2127 hdr_len = chain->n->guest_hdr_len;
b21e2380 2128 seg = g_new(VirtioNetRscSeg, 1);
2974e916
YB
2129 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2130 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2131 memcpy(seg->buf, buf, size);
2132 seg->size = size;
2133 seg->packets = 1;
2134 seg->dup_ack = 0;
2135 seg->is_coalesced = 0;
2136 seg->nc = nc;
2137
2138 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2139 chain->stat.cache++;
2140
2141 switch (chain->proto) {
2142 case ETH_P_IP:
2143 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2144 break;
2145 case ETH_P_IPV6:
2146 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2147 break;
2148 default:
2149 g_assert_not_reached();
2150 }
2151}
2152
2153static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2154 VirtioNetRscSeg *seg,
2155 const uint8_t *buf,
2156 struct tcp_header *n_tcp,
2157 struct tcp_header *o_tcp)
2158{
2159 uint32_t nack, oack;
2160 uint16_t nwin, owin;
2161
2162 nack = htonl(n_tcp->th_ack);
2163 nwin = htons(n_tcp->th_win);
2164 oack = htonl(o_tcp->th_ack);
2165 owin = htons(o_tcp->th_win);
2166
2167 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2168 chain->stat.ack_out_of_win++;
2169 return RSC_FINAL;
2170 } else if (nack == oack) {
2171 /* duplicated ack or window probe */
2172 if (nwin == owin) {
2173 /* duplicated ack, add dup ack count due to whql test up to 1 */
2174 chain->stat.dup_ack++;
2175 return RSC_FINAL;
2176 } else {
2177 /* Coalesce window update */
2178 o_tcp->th_win = n_tcp->th_win;
2179 chain->stat.win_update++;
2180 return RSC_COALESCE;
2181 }
2182 } else {
2183 /* pure ack, go to 'C', finalize*/
2184 chain->stat.pure_ack++;
2185 return RSC_FINAL;
2186 }
2187}
2188
2189static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2190 VirtioNetRscSeg *seg,
2191 const uint8_t *buf,
2192 VirtioNetRscUnit *n_unit)
2193{
2194 void *data;
2195 uint16_t o_ip_len;
2196 uint32_t nseq, oseq;
2197 VirtioNetRscUnit *o_unit;
2198
2199 o_unit = &seg->unit;
2200 o_ip_len = htons(*o_unit->ip_plen);
2201 nseq = htonl(n_unit->tcp->th_seq);
2202 oseq = htonl(o_unit->tcp->th_seq);
2203
2204 /* out of order or retransmitted. */
2205 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2206 chain->stat.data_out_of_win++;
2207 return RSC_FINAL;
2208 }
2209
2210 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2211 if (nseq == oseq) {
2212 if ((o_unit->payload == 0) && n_unit->payload) {
2213 /* From no payload to payload, normal case, not a dup ack or etc */
2214 chain->stat.data_after_pure_ack++;
2215 goto coalesce;
2216 } else {
2217 return virtio_net_rsc_handle_ack(chain, seg, buf,
2218 n_unit->tcp, o_unit->tcp);
2219 }
2220 } else if ((nseq - oseq) != o_unit->payload) {
2221 /* Not a consistent packet, out of order */
2222 chain->stat.data_out_of_order++;
2223 return RSC_FINAL;
2224 } else {
2225coalesce:
2226 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2227 chain->stat.over_size++;
2228 return RSC_FINAL;
2229 }
2230
2231 /* Here comes the right data, the payload length in v4/v6 is different,
2232 so use the field value to update and record the new data len */
2233 o_unit->payload += n_unit->payload; /* update new data len */
2234
2235 /* update field in ip header */
2236 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2237
2238 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2239 for windows guest, while this may change the behavior for linux
2240 guest (only if it uses RSC feature). */
2241 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2242
2243 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2244 o_unit->tcp->th_win = n_unit->tcp->th_win;
2245
2246 memmove(seg->buf + seg->size, data, n_unit->payload);
2247 seg->size += n_unit->payload;
2248 seg->packets++;
2249 chain->stat.coalesced++;
2250 return RSC_COALESCE;
2251 }
2252}
2253
2254static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2255 VirtioNetRscSeg *seg,
2256 const uint8_t *buf, size_t size,
2257 VirtioNetRscUnit *unit)
2258{
2259 struct ip_header *ip1, *ip2;
2260
2261 ip1 = (struct ip_header *)(unit->ip);
2262 ip2 = (struct ip_header *)(seg->unit.ip);
2263 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2264 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2265 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2266 chain->stat.no_match++;
2267 return RSC_NO_MATCH;
2268 }
2269
2270 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2271}
2272
2273static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2274 VirtioNetRscSeg *seg,
2275 const uint8_t *buf, size_t size,
2276 VirtioNetRscUnit *unit)
2277{
2278 struct ip6_header *ip1, *ip2;
2279
2280 ip1 = (struct ip6_header *)(unit->ip);
2281 ip2 = (struct ip6_header *)(seg->unit.ip);
2282 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2283 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2284 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2285 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2286 chain->stat.no_match++;
2287 return RSC_NO_MATCH;
2288 }
2289
2290 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2291}
2292
2293/* Packets with 'SYN' should bypass, other flag should be sent after drain
2294 * to prevent out of order */
2295static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2296 struct tcp_header *tcp)
2297{
2298 uint16_t tcp_hdr;
2299 uint16_t tcp_flag;
2300
2301 tcp_flag = htons(tcp->th_offset_flags);
2302 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2303 tcp_flag &= VIRTIO_NET_TCP_FLAG;
2974e916
YB
2304 if (tcp_flag & TH_SYN) {
2305 chain->stat.tcp_syn++;
2306 return RSC_BYPASS;
2307 }
2308
2309 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2310 chain->stat.tcp_ctrl_drain++;
2311 return RSC_FINAL;
2312 }
2313
2314 if (tcp_hdr > sizeof(struct tcp_header)) {
2315 chain->stat.tcp_all_opt++;
2316 return RSC_FINAL;
2317 }
2318
2319 return RSC_CANDIDATE;
2320}
2321
2322static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2323 NetClientState *nc,
2324 const uint8_t *buf, size_t size,
2325 VirtioNetRscUnit *unit)
2326{
2327 int ret;
2328 VirtioNetRscSeg *seg, *nseg;
2329
2330 if (QTAILQ_EMPTY(&chain->buffers)) {
2331 chain->stat.empty_cache++;
2332 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2333 timer_mod(chain->drain_timer,
2334 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2335 return size;
2336 }
2337
2338 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2339 if (chain->proto == ETH_P_IP) {
2340 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2341 } else {
2342 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2343 }
2344
2345 if (ret == RSC_FINAL) {
2346 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2347 /* Send failed */
2348 chain->stat.final_failed++;
2349 return 0;
2350 }
2351
2352 /* Send current packet */
2353 return virtio_net_do_receive(nc, buf, size);
2354 } else if (ret == RSC_NO_MATCH) {
2355 continue;
2356 } else {
2357 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2358 seg->is_coalesced = 1;
2359 return size;
2360 }
2361 }
2362
2363 chain->stat.no_match_cache++;
2364 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2365 return size;
2366}
2367
2368/* Drain a connection data, this is to avoid out of order segments */
2369static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2370 NetClientState *nc,
2371 const uint8_t *buf, size_t size,
2372 uint16_t ip_start, uint16_t ip_size,
2373 uint16_t tcp_port)
2374{
2375 VirtioNetRscSeg *seg, *nseg;
2376 uint32_t ppair1, ppair2;
2377
2378 ppair1 = *(uint32_t *)(buf + tcp_port);
2379 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2380 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2381 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2382 || (ppair1 != ppair2)) {
2383 continue;
2384 }
2385 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2386 chain->stat.drain_failed++;
2387 }
2388
2389 break;
2390 }
2391
2392 return virtio_net_do_receive(nc, buf, size);
2393}
2394
2395static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2396 struct ip_header *ip,
2397 const uint8_t *buf, size_t size)
2398{
2399 uint16_t ip_len;
2400
2401 /* Not an ipv4 packet */
2402 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2403 chain->stat.ip_option++;
2404 return RSC_BYPASS;
2405 }
2406
2407 /* Don't handle packets with ip option */
2408 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2409 chain->stat.ip_option++;
2410 return RSC_BYPASS;
2411 }
2412
2413 if (ip->ip_p != IPPROTO_TCP) {
2414 chain->stat.bypass_not_tcp++;
2415 return RSC_BYPASS;
2416 }
2417
2418 /* Don't handle packets with ip fragment */
2419 if (!(htons(ip->ip_off) & IP_DF)) {
2420 chain->stat.ip_frag++;
2421 return RSC_BYPASS;
2422 }
2423
2424 /* Don't handle packets with ecn flag */
2425 if (IPTOS_ECN(ip->ip_tos)) {
2426 chain->stat.ip_ecn++;
2427 return RSC_BYPASS;
2428 }
2429
2430 ip_len = htons(ip->ip_len);
2431 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2432 || ip_len > (size - chain->n->guest_hdr_len -
2433 sizeof(struct eth_header))) {
2434 chain->stat.ip_hacked++;
2435 return RSC_BYPASS;
2436 }
2437
2438 return RSC_CANDIDATE;
2439}
2440
2441static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2442 NetClientState *nc,
2443 const uint8_t *buf, size_t size)
2444{
2445 int32_t ret;
2446 uint16_t hdr_len;
2447 VirtioNetRscUnit unit;
2448
2449 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2450
2451 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2452 + sizeof(struct tcp_header))) {
2453 chain->stat.bypass_not_tcp++;
2454 return virtio_net_do_receive(nc, buf, size);
2455 }
2456
2457 virtio_net_rsc_extract_unit4(chain, buf, &unit);
2458 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2459 != RSC_CANDIDATE) {
2460 return virtio_net_do_receive(nc, buf, size);
2461 }
2462
2463 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2464 if (ret == RSC_BYPASS) {
2465 return virtio_net_do_receive(nc, buf, size);
2466 } else if (ret == RSC_FINAL) {
2467 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2468 ((hdr_len + sizeof(struct eth_header)) + 12),
2469 VIRTIO_NET_IP4_ADDR_SIZE,
2470 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2471 }
2472
2473 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2474}
2475
2476static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2477 struct ip6_header *ip6,
2478 const uint8_t *buf, size_t size)
2479{
2480 uint16_t ip_len;
2481
2482 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2483 != IP_HEADER_VERSION_6) {
2484 return RSC_BYPASS;
2485 }
2486
2487 /* Both option and protocol is checked in this */
2488 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2489 chain->stat.bypass_not_tcp++;
2490 return RSC_BYPASS;
2491 }
2492
2493 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2494 if (ip_len < sizeof(struct tcp_header) ||
2495 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2496 - sizeof(struct ip6_header))) {
2497 chain->stat.ip_hacked++;
2498 return RSC_BYPASS;
2499 }
2500
2501 /* Don't handle packets with ecn flag */
2502 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2503 chain->stat.ip_ecn++;
2504 return RSC_BYPASS;
2505 }
2506
2507 return RSC_CANDIDATE;
2508}
2509
2510static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2511 const uint8_t *buf, size_t size)
2512{
2513 int32_t ret;
2514 uint16_t hdr_len;
2515 VirtioNetRscChain *chain;
2516 VirtioNetRscUnit unit;
2517
3d558330 2518 chain = opq;
2974e916
YB
2519 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2520
2521 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2522 + sizeof(tcp_header))) {
2523 return virtio_net_do_receive(nc, buf, size);
2524 }
2525
2526 virtio_net_rsc_extract_unit6(chain, buf, &unit);
2527 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2528 unit.ip, buf, size)) {
2529 return virtio_net_do_receive(nc, buf, size);
2530 }
2531
2532 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2533 if (ret == RSC_BYPASS) {
2534 return virtio_net_do_receive(nc, buf, size);
2535 } else if (ret == RSC_FINAL) {
2536 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2537 ((hdr_len + sizeof(struct eth_header)) + 8),
2538 VIRTIO_NET_IP6_ADDR_SIZE,
2539 hdr_len + sizeof(struct eth_header)
2540 + sizeof(struct ip6_header));
2541 }
2542
2543 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2544}
2545
2546static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2547 NetClientState *nc,
2548 uint16_t proto)
2549{
2550 VirtioNetRscChain *chain;
2551
2552 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2553 return NULL;
2554 }
2555
2556 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2557 if (chain->proto == proto) {
2558 return chain;
2559 }
2560 }
2561
2562 chain = g_malloc(sizeof(*chain));
2563 chain->n = n;
2564 chain->proto = proto;
2565 if (proto == (uint16_t)ETH_P_IP) {
2566 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2567 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2568 } else {
2569 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2570 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2571 }
2572 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2573 virtio_net_rsc_purge, chain);
2574 memset(&chain->stat, 0, sizeof(chain->stat));
2575
2576 QTAILQ_INIT(&chain->buffers);
2577 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2578
2579 return chain;
2580}
2581
2582static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2583 const uint8_t *buf,
2584 size_t size)
2585{
2586 uint16_t proto;
2587 VirtioNetRscChain *chain;
2588 struct eth_header *eth;
2589 VirtIONet *n;
2590
2591 n = qemu_get_nic_opaque(nc);
2592 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2593 return virtio_net_do_receive(nc, buf, size);
2594 }
2595
2596 eth = (struct eth_header *)(buf + n->guest_hdr_len);
2597 proto = htons(eth->h_proto);
2598
2599 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2600 if (chain) {
2601 chain->stat.received++;
2602 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2603 return virtio_net_rsc_receive4(chain, nc, buf, size);
2604 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2605 return virtio_net_rsc_receive6(chain, nc, buf, size);
2606 }
2607 }
2608 return virtio_net_do_receive(nc, buf, size);
2609}
2610
2611static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2612 size_t size)
2613{
2614 VirtIONet *n = qemu_get_nic_opaque(nc);
2615 if ((n->rsc4_enabled || n->rsc6_enabled)) {
2616 return virtio_net_rsc_receive(nc, buf, size);
2617 } else {
2618 return virtio_net_do_receive(nc, buf, size);
2619 }
2620}
2621
0c87e93e 2622static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
6243375f 2623
4e68f7a0 2624static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
6243375f 2625{
cc1f0f45 2626 VirtIONet *n = qemu_get_nic_opaque(nc);
fed699f9 2627 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
17a0ca55 2628 VirtIODevice *vdev = VIRTIO_DEVICE(n);
df8d0708 2629 int ret;
6243375f 2630
51b19ebe 2631 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
17a0ca55 2632 virtio_notify(vdev, q->tx_vq);
6243375f 2633
51b19ebe
PB
2634 g_free(q->async_tx.elem);
2635 q->async_tx.elem = NULL;
6243375f 2636
0c87e93e 2637 virtio_queue_set_notification(q->tx_vq, 1);
df8d0708 2638 ret = virtio_net_flush_tx(q);
7550a822 2639 if (ret >= n->tx_burst) {
df8d0708
LV
2640 /*
2641 * the flush has been stopped by tx_burst
2642 * we will not receive notification for the
2643 * remainining part, so re-schedule
2644 */
2645 virtio_queue_set_notification(q->tx_vq, 0);
7550a822
LV
2646 if (q->tx_bh) {
2647 qemu_bh_schedule(q->tx_bh);
2648 } else {
2649 timer_mod(q->tx_timer,
2650 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2651 }
df8d0708
LV
2652 q->tx_waiting = 1;
2653 }
6243375f
MM
2654}
2655
fbe78f4f 2656/* TX */
0c87e93e 2657static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
fbe78f4f 2658{
0c87e93e 2659 VirtIONet *n = q->n;
17a0ca55 2660 VirtIODevice *vdev = VIRTIO_DEVICE(n);
51b19ebe 2661 VirtQueueElement *elem;
e3f30488 2662 int32_t num_packets = 0;
fed699f9 2663 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
17a0ca55 2664 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
e3f30488
AW
2665 return num_packets;
2666 }
fbe78f4f 2667
51b19ebe 2668 if (q->async_tx.elem) {
0c87e93e 2669 virtio_queue_set_notification(q->tx_vq, 0);
e3f30488 2670 return num_packets;
6243375f
MM
2671 }
2672
51b19ebe 2673 for (;;) {
bd89dd98 2674 ssize_t ret;
51b19ebe
PB
2675 unsigned int out_num;
2676 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
feb93f36 2677 struct virtio_net_hdr_mrg_rxbuf mhdr;
fbe78f4f 2678
51b19ebe
PB
2679 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2680 if (!elem) {
2681 break;
2682 }
2683
2684 out_num = elem->out_num;
2685 out_sg = elem->out_sg;
7b80d08e 2686 if (out_num < 1) {
fa5e56c2
GK
2687 virtio_error(vdev, "virtio-net header not in first element");
2688 virtqueue_detach_element(q->tx_vq, elem, 0);
2689 g_free(elem);
2690 return -EINVAL;
fbe78f4f
AL
2691 }
2692
032a74a1 2693 if (n->has_vnet_hdr) {
feb93f36
JW
2694 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2695 n->guest_hdr_len) {
fa5e56c2
GK
2696 virtio_error(vdev, "virtio-net header incorrect");
2697 virtqueue_detach_element(q->tx_vq, elem, 0);
2698 g_free(elem);
2699 return -EINVAL;
032a74a1 2700 }
1bfa316c 2701 if (n->needs_vnet_hdr_swap) {
feb93f36
JW
2702 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2703 sg2[0].iov_base = &mhdr;
2704 sg2[0].iov_len = n->guest_hdr_len;
2705 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2706 out_sg, out_num,
2707 n->guest_hdr_len, -1);
2708 if (out_num == VIRTQUEUE_MAX_SIZE) {
2709 goto drop;
7d37435b 2710 }
feb93f36
JW
2711 out_num += 1;
2712 out_sg = sg2;
7d37435b 2713 }
032a74a1 2714 }
14761f9c
MT
2715 /*
2716 * If host wants to see the guest header as is, we can
2717 * pass it on unchanged. Otherwise, copy just the parts
2718 * that host is interested in.
2719 */
2720 assert(n->host_hdr_len <= n->guest_hdr_len);
2721 if (n->host_hdr_len != n->guest_hdr_len) {
2722 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2723 out_sg, out_num,
2724 0, n->host_hdr_len);
2725 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2726 out_sg, out_num,
2727 n->guest_hdr_len, -1);
2728 out_num = sg_num;
2729 out_sg = sg;
fbe78f4f
AL
2730 }
2731
fed699f9
JW
2732 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2733 out_sg, out_num, virtio_net_tx_complete);
6243375f 2734 if (ret == 0) {
0c87e93e
JW
2735 virtio_queue_set_notification(q->tx_vq, 0);
2736 q->async_tx.elem = elem;
e3f30488 2737 return -EBUSY;
6243375f
MM
2738 }
2739
feb93f36 2740drop:
51b19ebe 2741 virtqueue_push(q->tx_vq, elem, 0);
17a0ca55 2742 virtio_notify(vdev, q->tx_vq);
51b19ebe 2743 g_free(elem);
e3f30488
AW
2744
2745 if (++num_packets >= n->tx_burst) {
2746 break;
2747 }
fbe78f4f 2748 }
e3f30488 2749 return num_packets;
fbe78f4f
AL
2750}
2751
7550a822
LV
2752static void virtio_net_tx_timer(void *opaque);
2753
a697a334 2754static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
fbe78f4f 2755{
17a0ca55 2756 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 2757 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
fbe78f4f 2758
283e2c2a
YB
2759 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2760 virtio_net_drop_tx_queue_data(vdev, vq);
2761 return;
2762 }
2763
783e7706 2764 /* This happens when device was stopped but VCPU wasn't. */
17a0ca55 2765 if (!vdev->vm_running) {
0c87e93e 2766 q->tx_waiting = 1;
783e7706
MT
2767 return;
2768 }
2769
0c87e93e 2770 if (q->tx_waiting) {
7550a822 2771 /* We already have queued packets, immediately flush */
bc72ad67 2772 timer_del(q->tx_timer);
7550a822 2773 virtio_net_tx_timer(q);
fbe78f4f 2774 } else {
7550a822 2775 /* re-arm timer to flush it (and more) on next tick */
bc72ad67 2776 timer_mod(q->tx_timer,
7550a822 2777 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
0c87e93e 2778 q->tx_waiting = 1;
fbe78f4f
AL
2779 virtio_queue_set_notification(vq, 0);
2780 }
2781}
2782
a697a334
AW
2783static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2784{
17a0ca55 2785 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 2786 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
a697a334 2787
283e2c2a
YB
2788 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2789 virtio_net_drop_tx_queue_data(vdev, vq);
2790 return;
2791 }
2792
0c87e93e 2793 if (unlikely(q->tx_waiting)) {
a697a334
AW
2794 return;
2795 }
0c87e93e 2796 q->tx_waiting = 1;
783e7706 2797 /* This happens when device was stopped but VCPU wasn't. */
17a0ca55 2798 if (!vdev->vm_running) {
783e7706
MT
2799 return;
2800 }
a697a334 2801 virtio_queue_set_notification(vq, 0);
0c87e93e 2802 qemu_bh_schedule(q->tx_bh);
a697a334
AW
2803}
2804
fbe78f4f
AL
2805static void virtio_net_tx_timer(void *opaque)
2806{
0c87e93e
JW
2807 VirtIONetQueue *q = opaque;
2808 VirtIONet *n = q->n;
17a0ca55 2809 VirtIODevice *vdev = VIRTIO_DEVICE(n);
7550a822
LV
2810 int ret;
2811
e8bcf842
MT
2812 /* This happens when device was stopped but BH wasn't. */
2813 if (!vdev->vm_running) {
2814 /* Make sure tx waiting is set, so we'll run when restarted. */
2815 assert(q->tx_waiting);
2816 return;
2817 }
fbe78f4f 2818
0c87e93e 2819 q->tx_waiting = 0;
fbe78f4f
AL
2820
2821 /* Just in case the driver is not ready on more */
17a0ca55 2822 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
fbe78f4f 2823 return;
17a0ca55 2824 }
fbe78f4f 2825
7550a822
LV
2826 ret = virtio_net_flush_tx(q);
2827 if (ret == -EBUSY || ret == -EINVAL) {
2828 return;
2829 }
2830 /*
2831 * If we flush a full burst of packets, assume there are
2832 * more coming and immediately rearm
2833 */
2834 if (ret >= n->tx_burst) {
2835 q->tx_waiting = 1;
2836 timer_mod(q->tx_timer,
2837 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2838 return;
2839 }
2840 /*
2841 * If less than a full burst, re-enable notification and flush
2842 * anything that may have come in while we weren't looking. If
2843 * we find something, assume the guest is still active and rearm
2844 */
0c87e93e 2845 virtio_queue_set_notification(q->tx_vq, 1);
7550a822
LV
2846 ret = virtio_net_flush_tx(q);
2847 if (ret > 0) {
2848 virtio_queue_set_notification(q->tx_vq, 0);
2849 q->tx_waiting = 1;
2850 timer_mod(q->tx_timer,
2851 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2852 }
fbe78f4f
AL
2853}
2854
a697a334
AW
2855static void virtio_net_tx_bh(void *opaque)
2856{
0c87e93e
JW
2857 VirtIONetQueue *q = opaque;
2858 VirtIONet *n = q->n;
17a0ca55 2859 VirtIODevice *vdev = VIRTIO_DEVICE(n);
a697a334
AW
2860 int32_t ret;
2861
e8bcf842
MT
2862 /* This happens when device was stopped but BH wasn't. */
2863 if (!vdev->vm_running) {
2864 /* Make sure tx waiting is set, so we'll run when restarted. */
2865 assert(q->tx_waiting);
2866 return;
2867 }
783e7706 2868
0c87e93e 2869 q->tx_waiting = 0;
a697a334
AW
2870
2871 /* Just in case the driver is not ready on more */
17a0ca55 2872 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
a697a334 2873 return;
17a0ca55 2874 }
a697a334 2875
0c87e93e 2876 ret = virtio_net_flush_tx(q);
fa5e56c2
GK
2877 if (ret == -EBUSY || ret == -EINVAL) {
2878 return; /* Notification re-enable handled by tx_complete or device
2879 * broken */
a697a334
AW
2880 }
2881
2882 /* If we flush a full burst of packets, assume there are
2883 * more coming and immediately reschedule */
2884 if (ret >= n->tx_burst) {
0c87e93e
JW
2885 qemu_bh_schedule(q->tx_bh);
2886 q->tx_waiting = 1;
a697a334
AW
2887 return;
2888 }
2889
2890 /* If less than a full burst, re-enable notification and flush
2891 * anything that may have come in while we weren't looking. If
2892 * we find something, assume the guest is still active and reschedule */
0c87e93e 2893 virtio_queue_set_notification(q->tx_vq, 1);
fa5e56c2
GK
2894 ret = virtio_net_flush_tx(q);
2895 if (ret == -EINVAL) {
2896 return;
2897 } else if (ret > 0) {
0c87e93e
JW
2898 virtio_queue_set_notification(q->tx_vq, 0);
2899 qemu_bh_schedule(q->tx_bh);
2900 q->tx_waiting = 1;
a697a334
AW
2901 }
2902}
2903
f9d6dbf0
WC
2904static void virtio_net_add_queue(VirtIONet *n, int index)
2905{
2906 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2907
1c0fbfa3
MT
2908 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2909 virtio_net_handle_rx);
9b02e161 2910
f9d6dbf0
WC
2911 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2912 n->vqs[index].tx_vq =
9b02e161
WW
2913 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2914 virtio_net_handle_tx_timer);
f9d6dbf0
WC
2915 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2916 virtio_net_tx_timer,
2917 &n->vqs[index]);
2918 } else {
2919 n->vqs[index].tx_vq =
9b02e161
WW
2920 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2921 virtio_net_handle_tx_bh);
f63192b0
AB
2922 n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
2923 &DEVICE(vdev)->mem_reentrancy_guard);
f9d6dbf0
WC
2924 }
2925
2926 n->vqs[index].tx_waiting = 0;
2927 n->vqs[index].n = n;
2928}
2929
2930static void virtio_net_del_queue(VirtIONet *n, int index)
2931{
2932 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2933 VirtIONetQueue *q = &n->vqs[index];
2934 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2935
2936 qemu_purge_queued_packets(nc);
2937
2938 virtio_del_queue(vdev, index * 2);
2939 if (q->tx_timer) {
f9d6dbf0 2940 timer_free(q->tx_timer);
f989c30c 2941 q->tx_timer = NULL;
f9d6dbf0
WC
2942 } else {
2943 qemu_bh_delete(q->tx_bh);
f989c30c 2944 q->tx_bh = NULL;
f9d6dbf0 2945 }
f989c30c 2946 q->tx_waiting = 0;
f9d6dbf0
WC
2947 virtio_del_queue(vdev, index * 2 + 1);
2948}
2949
441537f1 2950static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
f9d6dbf0
WC
2951{
2952 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2953 int old_num_queues = virtio_get_num_queues(vdev);
441537f1 2954 int new_num_queues = new_max_queue_pairs * 2 + 1;
f9d6dbf0
WC
2955 int i;
2956
2957 assert(old_num_queues >= 3);
2958 assert(old_num_queues % 2 == 1);
2959
2960 if (old_num_queues == new_num_queues) {
2961 return;
2962 }
2963
2964 /*
2965 * We always need to remove and add ctrl vq if
2966 * old_num_queues != new_num_queues. Remove ctrl_vq first,
20f86a75 2967 * and then we only enter one of the following two loops.
f9d6dbf0
WC
2968 */
2969 virtio_del_queue(vdev, old_num_queues - 1);
2970
2971 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2972 /* new_num_queues < old_num_queues */
2973 virtio_net_del_queue(n, i / 2);
2974 }
2975
2976 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2977 /* new_num_queues > old_num_queues */
2978 virtio_net_add_queue(n, i / 2);
2979 }
2980
2981 /* add ctrl_vq last */
2982 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2983}
2984
ec57db16 2985static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
fed699f9 2986{
441537f1 2987 int max = multiqueue ? n->max_queue_pairs : 1;
f9d6dbf0 2988
fed699f9 2989 n->multiqueue = multiqueue;
441537f1 2990 virtio_net_change_num_queue_pairs(n, max);
fed699f9 2991
441537f1 2992 virtio_net_set_queue_pairs(n);
fed699f9
JW
2993}
2994
982b78c5 2995static int virtio_net_post_load_device(void *opaque, int version_id)
037dab2f 2996{
982b78c5
DDAG
2997 VirtIONet *n = opaque;
2998 VirtIODevice *vdev = VIRTIO_DEVICE(n);
037dab2f 2999 int i, link_down;
fbe78f4f 3000
9d8c6a25 3001 trace_virtio_net_post_load_device();
982b78c5 3002 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
95129d6f 3003 virtio_vdev_has_feature(vdev,
e22f0603
YB
3004 VIRTIO_F_VERSION_1),
3005 virtio_vdev_has_feature(vdev,
3006 VIRTIO_NET_F_HASH_REPORT));
fbe78f4f 3007
76010cb3 3008 /* MAC_TABLE_ENTRIES may be different from the saved image */
982b78c5 3009 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
76010cb3 3010 n->mac_table.in_use = 0;
b6503ed9 3011 }
0ce0e8f4 3012
982b78c5 3013 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
6c666823
MT
3014 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3015 }
3016
7788c3f2
MS
3017 /*
3018 * curr_guest_offloads will be later overwritten by the
3019 * virtio_set_features_nocheck call done from the virtio_load.
3020 * Here we make sure it is preserved and restored accordingly
3021 * in the virtio_net_post_load_virtio callback.
3022 */
3023 n->saved_guest_offloads = n->curr_guest_offloads;
6c666823 3024
441537f1 3025 virtio_net_set_queue_pairs(n);
5f800801 3026
2d9aba39
AW
3027 /* Find the first multicast entry in the saved MAC filter */
3028 for (i = 0; i < n->mac_table.in_use; i++) {
3029 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3030 break;
3031 }
3032 }
3033 n->mac_table.first_multi = i;
98991481
AK
3034
3035 /* nc.link_down can't be migrated, so infer link_down according
3036 * to link status bit in n->status */
5f800801 3037 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
441537f1 3038 for (i = 0; i < n->max_queue_pairs; i++) {
5f800801
JW
3039 qemu_get_subqueue(n->nic, i)->link_down = link_down;
3040 }
98991481 3041
6c666823
MT
3042 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3043 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
9d8c6a25
DDAG
3044 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3045 QEMU_CLOCK_VIRTUAL,
3046 virtio_net_announce_timer, n);
3047 if (n->announce_timer.round) {
3048 timer_mod(n->announce_timer.tm,
3049 qemu_clock_get_ms(n->announce_timer.type));
3050 } else {
944458b6 3051 qemu_announce_timer_del(&n->announce_timer, false);
9d8c6a25 3052 }
6c666823
MT
3053 }
3054
e41b7114 3055 if (n->rss_data.enabled) {
0145c393
AM
3056 n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
3057 if (!n->rss_data.populate_hash) {
3058 if (!virtio_net_attach_epbf_rss(n)) {
3059 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
3060 warn_report("Can't post-load eBPF RSS for vhost");
3061 } else {
3062 warn_report("Can't post-load eBPF RSS - "
3063 "fallback to software RSS");
3064 n->rss_data.enabled_software_rss = true;
3065 }
3066 }
3067 }
3068
e41b7114
YB
3069 trace_virtio_net_rss_enable(n->rss_data.hash_types,
3070 n->rss_data.indirections_len,
3071 sizeof(n->rss_data.key));
3072 } else {
3073 trace_virtio_net_rss_disable();
3074 }
fbe78f4f
AL
3075 return 0;
3076}
3077
7788c3f2
MS
3078static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3079{
3080 VirtIONet *n = VIRTIO_NET(vdev);
3081 /*
3082 * The actual needed state is now in saved_guest_offloads,
3083 * see virtio_net_post_load_device for detail.
3084 * Restore it back and apply the desired offloads.
3085 */
3086 n->curr_guest_offloads = n->saved_guest_offloads;
3087 if (peer_has_vnet_hdr(n)) {
3088 virtio_net_apply_guest_offloads(n);
3089 }
3090
3091 return 0;
3092}
3093
982b78c5
DDAG
3094/* tx_waiting field of a VirtIONetQueue */
3095static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3096 .name = "virtio-net-queue-tx_waiting",
3097 .fields = (VMStateField[]) {
3098 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3099 VMSTATE_END_OF_LIST()
3100 },
3101};
3102
441537f1 3103static bool max_queue_pairs_gt_1(void *opaque, int version_id)
982b78c5 3104{
441537f1 3105 return VIRTIO_NET(opaque)->max_queue_pairs > 1;
982b78c5
DDAG
3106}
3107
3108static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3109{
3110 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3111 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3112}
3113
3114static bool mac_table_fits(void *opaque, int version_id)
3115{
3116 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3117}
3118
3119static bool mac_table_doesnt_fit(void *opaque, int version_id)
3120{
3121 return !mac_table_fits(opaque, version_id);
3122}
3123
3124/* This temporary type is shared by all the WITH_TMP methods
3125 * although only some fields are used by each.
3126 */
3127struct VirtIONetMigTmp {
3128 VirtIONet *parent;
3129 VirtIONetQueue *vqs_1;
441537f1 3130 uint16_t curr_queue_pairs_1;
982b78c5
DDAG
3131 uint8_t has_ufo;
3132 uint32_t has_vnet_hdr;
3133};
3134
3135/* The 2nd and subsequent tx_waiting flags are loaded later than
441537f1 3136 * the 1st entry in the queue_pairs and only if there's more than one
982b78c5
DDAG
3137 * entry. We use the tmp mechanism to calculate a temporary
3138 * pointer and count and also validate the count.
3139 */
3140
44b1ff31 3141static int virtio_net_tx_waiting_pre_save(void *opaque)
982b78c5
DDAG
3142{
3143 struct VirtIONetMigTmp *tmp = opaque;
3144
3145 tmp->vqs_1 = tmp->parent->vqs + 1;
441537f1
JW
3146 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3147 if (tmp->parent->curr_queue_pairs == 0) {
3148 tmp->curr_queue_pairs_1 = 0;
982b78c5 3149 }
44b1ff31
DDAG
3150
3151 return 0;
982b78c5
DDAG
3152}
3153
3154static int virtio_net_tx_waiting_pre_load(void *opaque)
3155{
3156 struct VirtIONetMigTmp *tmp = opaque;
3157
3158 /* Reuse the pointer setup from save */
3159 virtio_net_tx_waiting_pre_save(opaque);
3160
441537f1
JW
3161 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3162 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3163 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
982b78c5
DDAG
3164
3165 return -EINVAL;
3166 }
3167
3168 return 0; /* all good */
3169}
3170
3171static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3172 .name = "virtio-net-tx_waiting",
3173 .pre_load = virtio_net_tx_waiting_pre_load,
3174 .pre_save = virtio_net_tx_waiting_pre_save,
3175 .fields = (VMStateField[]) {
3176 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
441537f1 3177 curr_queue_pairs_1,
982b78c5
DDAG
3178 vmstate_virtio_net_queue_tx_waiting,
3179 struct VirtIONetQueue),
3180 VMSTATE_END_OF_LIST()
3181 },
3182};
3183
3184/* the 'has_ufo' flag is just tested; if the incoming stream has the
3185 * flag set we need to check that we have it
3186 */
3187static int virtio_net_ufo_post_load(void *opaque, int version_id)
3188{
3189 struct VirtIONetMigTmp *tmp = opaque;
3190
3191 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3192 error_report("virtio-net: saved image requires TUN_F_UFO support");
3193 return -EINVAL;
3194 }
3195
3196 return 0;
3197}
3198
44b1ff31 3199static int virtio_net_ufo_pre_save(void *opaque)
982b78c5
DDAG
3200{
3201 struct VirtIONetMigTmp *tmp = opaque;
3202
3203 tmp->has_ufo = tmp->parent->has_ufo;
44b1ff31
DDAG
3204
3205 return 0;
982b78c5
DDAG
3206}
3207
3208static const VMStateDescription vmstate_virtio_net_has_ufo = {
3209 .name = "virtio-net-ufo",
3210 .post_load = virtio_net_ufo_post_load,
3211 .pre_save = virtio_net_ufo_pre_save,
3212 .fields = (VMStateField[]) {
3213 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3214 VMSTATE_END_OF_LIST()
3215 },
3216};
3217
3218/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3219 * flag set we need to check that we have it
3220 */
3221static int virtio_net_vnet_post_load(void *opaque, int version_id)
3222{
3223 struct VirtIONetMigTmp *tmp = opaque;
3224
3225 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3226 error_report("virtio-net: saved image requires vnet_hdr=on");
3227 return -EINVAL;
3228 }
3229
3230 return 0;
3231}
3232
44b1ff31 3233static int virtio_net_vnet_pre_save(void *opaque)
982b78c5
DDAG
3234{
3235 struct VirtIONetMigTmp *tmp = opaque;
3236
3237 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
44b1ff31
DDAG
3238
3239 return 0;
982b78c5
DDAG
3240}
3241
3242static const VMStateDescription vmstate_virtio_net_has_vnet = {
3243 .name = "virtio-net-vnet",
3244 .post_load = virtio_net_vnet_post_load,
3245 .pre_save = virtio_net_vnet_pre_save,
3246 .fields = (VMStateField[]) {
3247 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3248 VMSTATE_END_OF_LIST()
3249 },
3250};
3251
e41b7114
YB
3252static bool virtio_net_rss_needed(void *opaque)
3253{
3254 return VIRTIO_NET(opaque)->rss_data.enabled;
3255}
3256
3257static const VMStateDescription vmstate_virtio_net_rss = {
3258 .name = "virtio-net-device/rss",
3259 .version_id = 1,
3260 .minimum_version_id = 1,
3261 .needed = virtio_net_rss_needed,
3262 .fields = (VMStateField[]) {
3263 VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3264 VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3265 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3266 VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3267 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3268 VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3269 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3270 VIRTIO_NET_RSS_MAX_KEY_SIZE),
3271 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3272 rss_data.indirections_len, 0,
3273 vmstate_info_uint16, uint16_t),
3274 VMSTATE_END_OF_LIST()
3275 },
3276};
3277
982b78c5
DDAG
3278static const VMStateDescription vmstate_virtio_net_device = {
3279 .name = "virtio-net-device",
3280 .version_id = VIRTIO_NET_VM_VERSION,
3281 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3282 .post_load = virtio_net_post_load_device,
3283 .fields = (VMStateField[]) {
3284 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3285 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3286 vmstate_virtio_net_queue_tx_waiting,
3287 VirtIONetQueue),
3288 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3289 VMSTATE_UINT16(status, VirtIONet),
3290 VMSTATE_UINT8(promisc, VirtIONet),
3291 VMSTATE_UINT8(allmulti, VirtIONet),
3292 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3293
3294 /* Guarded pair: If it fits we load it, else we throw it away
3295 * - can happen if source has a larger MAC table.; post-load
3296 * sets flags in this case.
3297 */
3298 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3299 0, mac_table_fits, mac_table.in_use,
3300 ETH_ALEN),
3301 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3302 mac_table.in_use, ETH_ALEN),
3303
3304 /* Note: This is an array of uint32's that's always been saved as a
3305 * buffer; hold onto your endiannesses; it's actually used as a bitmap
3306 * but based on the uint.
3307 */
3308 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3309 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3310 vmstate_virtio_net_has_vnet),
3311 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3312 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3313 VMSTATE_UINT8(alluni, VirtIONet),
3314 VMSTATE_UINT8(nomulti, VirtIONet),
3315 VMSTATE_UINT8(nouni, VirtIONet),
3316 VMSTATE_UINT8(nobcast, VirtIONet),
3317 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3318 vmstate_virtio_net_has_ufo),
441537f1 3319 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
982b78c5 3320 vmstate_info_uint16_equal, uint16_t),
441537f1 3321 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
982b78c5
DDAG
3322 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3323 vmstate_virtio_net_tx_waiting),
3324 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3325 has_ctrl_guest_offloads),
3326 VMSTATE_END_OF_LIST()
3327 },
e41b7114
YB
3328 .subsections = (const VMStateDescription * []) {
3329 &vmstate_virtio_net_rss,
3330 NULL
3331 }
982b78c5
DDAG
3332};
3333
eb6b6c12 3334static NetClientInfo net_virtio_info = {
f394b2e2 3335 .type = NET_CLIENT_DRIVER_NIC,
eb6b6c12
MM
3336 .size = sizeof(NICState),
3337 .can_receive = virtio_net_can_receive,
3338 .receive = virtio_net_receive,
eb6b6c12 3339 .link_status_changed = virtio_net_set_link_status,
b1be4280 3340 .query_rx_filter = virtio_net_query_rxfilter,
b2c929f0 3341 .announce = virtio_net_announce,
eb6b6c12
MM
3342};
3343
f56a1247
MT
3344static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3345{
17a0ca55 3346 VirtIONet *n = VIRTIO_NET(vdev);
68b0a639 3347 NetClientState *nc;
f56a1247 3348 assert(n->vhost_started);
68b0a639
SWL
3349 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3350 /* Must guard against invalid features and bogus queue index
3351 * from being set by malicious guest, or penetrated through
3352 * buggy migration stream.
3353 */
3354 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3355 qemu_log_mask(LOG_GUEST_ERROR,
3356 "%s: bogus vq index ignored\n", __func__);
3357 return false;
3358 }
3359 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3360 } else {
3361 nc = qemu_get_subqueue(n->nic, vq2q(idx));
3362 }
544f0278
CL
3363 /*
3364 * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3365 * as the Marco of configure interrupt's IDX, If this driver does not
3366 * support, the function will return false
3367 */
3368
3369 if (idx == VIRTIO_CONFIG_IRQ_IDX) {
8aab0d1d 3370 return vhost_net_config_pending(get_vhost_net(nc->peer));
544f0278 3371 }
ed8b4afe 3372 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
f56a1247
MT
3373}
3374
3375static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3376 bool mask)
3377{
17a0ca55 3378 VirtIONet *n = VIRTIO_NET(vdev);
68b0a639 3379 NetClientState *nc;
f56a1247 3380 assert(n->vhost_started);
68b0a639
SWL
3381 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3382 /* Must guard against invalid features and bogus queue index
3383 * from being set by malicious guest, or penetrated through
3384 * buggy migration stream.
3385 */
3386 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3387 qemu_log_mask(LOG_GUEST_ERROR,
3388 "%s: bogus vq index ignored\n", __func__);
3389 return;
3390 }
3391 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3392 } else {
3393 nc = qemu_get_subqueue(n->nic, vq2q(idx));
3394 }
544f0278
CL
3395 /*
3396 *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3397 * as the Marco of configure interrupt's IDX, If this driver does not
3398 * support, the function will return
3399 */
3400
3401 if (idx == VIRTIO_CONFIG_IRQ_IDX) {
8aab0d1d 3402 vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
544f0278
CL
3403 return;
3404 }
544f0278 3405 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
f56a1247
MT
3406}
3407
019a3edb 3408static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
fbe78f4f 3409{
0cd09c3a 3410 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
a93e599d 3411
d74c30c8 3412 n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
17ec5a86
FK
3413}
3414
8a253ec2
FK
3415void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3416 const char *type)
3417{
3418 /*
3419 * The name can be NULL, the netclient name will be type.x.
3420 */
3421 assert(type != NULL);
3422
9e288406 3423 g_free(n->netclient_name);
9e288406 3424 g_free(n->netclient_type);
80e0090a 3425 n->netclient_name = g_strdup(name);
8a253ec2
FK
3426 n->netclient_type = g_strdup(type);
3427}
3428
0e9a65c5 3429static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
9711cd0d
JF
3430{
3431 HotplugHandler *hotplug_ctrl;
3432 PCIDevice *pci_dev;
3433 Error *err = NULL;
3434
0e9a65c5 3435 hotplug_ctrl = qdev_get_hotplug_handler(dev);
9711cd0d 3436 if (hotplug_ctrl) {
0e9a65c5 3437 pci_dev = PCI_DEVICE(dev);
9711cd0d 3438 pci_dev->partially_hotplugged = true;
0e9a65c5 3439 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
9711cd0d
JF
3440 if (err) {
3441 error_report_err(err);
3442 return false;
3443 }
3444 } else {
3445 return false;
3446 }
3447 return true;
3448}
3449
0e9a65c5
JQ
3450static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3451 Error **errp)
9711cd0d 3452{
5a0948d3 3453 Error *err = NULL;
9711cd0d 3454 HotplugHandler *hotplug_ctrl;
0e9a65c5 3455 PCIDevice *pdev = PCI_DEVICE(dev);
78274682 3456 BusState *primary_bus;
9711cd0d
JF
3457
3458 if (!pdev->partially_hotplugged) {
3459 return true;
3460 }
0e9a65c5 3461 primary_bus = dev->parent_bus;
78274682 3462 if (!primary_bus) {
150ab54a 3463 error_setg(errp, "virtio_net: couldn't find primary bus");
5a0948d3 3464 return false;
9711cd0d 3465 }
0e9a65c5 3466 qdev_set_parent_bus(dev, primary_bus, &error_abort);
e2bde83e 3467 qatomic_set(&n->failover_primary_hidden, false);
0e9a65c5 3468 hotplug_ctrl = qdev_get_hotplug_handler(dev);
150ab54a 3469 if (hotplug_ctrl) {
0e9a65c5 3470 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
5a0948d3
MA
3471 if (err) {
3472 goto out;
3473 }
0e9a65c5 3474 hotplug_handler_plug(hotplug_ctrl, dev, &err);
150ab54a 3475 }
109c20ea 3476 pdev->partially_hotplugged = false;
150ab54a
JF
3477
3478out:
5a0948d3
MA
3479 error_propagate(errp, err);
3480 return !err;
9711cd0d
JF
3481}
3482
07a5d816 3483static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
9711cd0d
JF
3484{
3485 bool should_be_hidden;
3486 Error *err = NULL;
07a5d816 3487 DeviceState *dev = failover_find_primary_device(n);
9711cd0d 3488
07a5d816
JQ
3489 if (!dev) {
3490 return;
9711cd0d
JF
3491 }
3492
07a5d816
JQ
3493 should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3494
4dbac1ae 3495 if (migration_in_setup(s) && !should_be_hidden) {
07a5d816
JQ
3496 if (failover_unplug_primary(n, dev)) {
3497 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3498 qapi_event_send_unplug_primary(dev->id);
e2bde83e 3499 qatomic_set(&n->failover_primary_hidden, true);
9711cd0d
JF
3500 } else {
3501 warn_report("couldn't unplug primary device");
3502 }
3503 } else if (migration_has_failed(s)) {
150ab54a 3504 /* We already unplugged the device let's plug it back */
07a5d816 3505 if (!failover_replug_primary(n, dev, &err)) {
9711cd0d
JF
3506 if (err) {
3507 error_report_err(err);
3508 }
3509 }
3510 }
3511}
3512
3513static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3514{
3515 MigrationState *s = data;
3516 VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3517 virtio_net_handle_migration_primary(n, s);
3518}
3519
b91ad981 3520static bool failover_hide_primary_device(DeviceListener *listener,
f3558b1b
KW
3521 const QDict *device_opts,
3522 bool from_json,
3523 Error **errp)
9711cd0d
JF
3524{
3525 VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
4f0303ae 3526 const char *standby_id;
9711cd0d 3527
4d0e59ac 3528 if (!device_opts) {
89631fed 3529 return false;
4d0e59ac 3530 }
bcfc906b
LV
3531
3532 if (!qdict_haskey(device_opts, "failover_pair_id")) {
3533 return false;
3534 }
3535
3536 if (!qdict_haskey(device_opts, "id")) {
3537 error_setg(errp, "Device with failover_pair_id needs to have id");
3538 return false;
3539 }
3540
3541 standby_id = qdict_get_str(device_opts, "failover_pair_id");
89631fed
JQ
3542 if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3543 return false;
9711cd0d
JF
3544 }
3545
7fe7791e
LV
3546 /*
3547 * The hide helper can be called several times for a given device.
3548 * Check there is only one primary for a virtio-net device but
3549 * don't duplicate the qdict several times if it's called for the same
3550 * device.
3551 */
259a10db 3552 if (n->primary_opts) {
7fe7791e
LV
3553 const char *old, *new;
3554 /* devices with failover_pair_id always have an id */
3555 old = qdict_get_str(n->primary_opts, "id");
3556 new = qdict_get_str(device_opts, "id");
3557 if (strcmp(old, new) != 0) {
3558 error_setg(errp, "Cannot attach more than one primary device to "
3559 "'%s': '%s' and '%s'", n->netclient_name, old, new);
3560 return false;
3561 }
3562 } else {
3563 n->primary_opts = qdict_clone_shallow(device_opts);
3564 n->primary_opts_from_json = from_json;
259a10db
KW
3565 }
3566
e2bde83e 3567 /* failover_primary_hidden is set during feature negotiation */
3abad4a2 3568 return qatomic_read(&n->failover_primary_hidden);
9711cd0d
JF
3569}
3570
e6f746b3 3571static void virtio_net_device_realize(DeviceState *dev, Error **errp)
17ec5a86 3572{
e6f746b3 3573 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
284a32f0 3574 VirtIONet *n = VIRTIO_NET(dev);
b1be4280 3575 NetClientState *nc;
284a32f0 3576 int i;
1773d9ee 3577
a93e599d 3578 if (n->net_conf.mtu) {
127833ee 3579 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
a93e599d
MC
3580 }
3581
9473939e
JB
3582 if (n->net_conf.duplex_str) {
3583 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3584 n->net_conf.duplex = DUPLEX_HALF;
3585 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3586 n->net_conf.duplex = DUPLEX_FULL;
3587 } else {
3588 error_setg(errp, "'duplex' must be 'half' or 'full'");
843c4cfc 3589 return;
9473939e
JB
3590 }
3591 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3592 } else {
3593 n->net_conf.duplex = DUPLEX_UNKNOWN;
3594 }
3595
3596 if (n->net_conf.speed < SPEED_UNKNOWN) {
3597 error_setg(errp, "'speed' must be between 0 and INT_MAX");
843c4cfc
MA
3598 return;
3599 }
3600 if (n->net_conf.speed >= 0) {
9473939e
JB
3601 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3602 }
3603
9711cd0d 3604 if (n->failover) {
b91ad981 3605 n->primary_listener.hide_device = failover_hide_primary_device;
e2bde83e 3606 qatomic_set(&n->failover_primary_hidden, true);
9711cd0d
JF
3607 device_listener_register(&n->primary_listener);
3608 n->migration_state.notify = virtio_net_migration_state_notifier;
3609 add_migration_state_change_notifier(&n->migration_state);
3610 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3611 }
3612
da3e8a23 3613 virtio_net_set_config_size(n, n->host_features);
3857cd5c 3614 virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
fbe78f4f 3615
1c0fbfa3
MT
3616 /*
3617 * We set a lower limit on RX queue size to what it always was.
3618 * Guests that want a smaller ring can always resize it without
3619 * help from us (using virtio 1 and up).
3620 */
3621 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3622 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
5f997fd1 3623 !is_power_of_2(n->net_conf.rx_queue_size)) {
1c0fbfa3
MT
3624 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3625 "must be a power of 2 between %d and %d.",
3626 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3627 VIRTQUEUE_MAX_SIZE);
3628 virtio_cleanup(vdev);
3629 return;
3630 }
3631
9b02e161
WW
3632 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3633 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3634 !is_power_of_2(n->net_conf.tx_queue_size)) {
3635 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3636 "must be a power of 2 between %d and %d",
3637 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3638 VIRTQUEUE_MAX_SIZE);
3639 virtio_cleanup(vdev);
3640 return;
3641 }
3642
22288fe5
JW
3643 n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3644
3645 /*
3646 * Figure out the datapath queue pairs since the backend could
3647 * provide control queue via peers as well.
3648 */
3649 if (n->nic_conf.peers.queues) {
3650 for (i = 0; i < n->max_ncs; i++) {
3651 if (n->nic_conf.peers.ncs[i]->is_datapath) {
3652 ++n->max_queue_pairs;
3653 }
3654 }
3655 }
3656 n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3657
441537f1 3658 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
22288fe5 3659 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
631b22ea 3660 "must be a positive integer less than %d.",
441537f1 3661 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
7e0e736e
JW
3662 virtio_cleanup(vdev);
3663 return;
3664 }
b21e2380 3665 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
441537f1 3666 n->curr_queue_pairs = 1;
1773d9ee 3667 n->tx_timeout = n->net_conf.txtimer;
a697a334 3668
1773d9ee
FK
3669 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3670 && strcmp(n->net_conf.tx, "bh")) {
0765691e
MA
3671 warn_report("virtio-net: "
3672 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3673 n->net_conf.tx);
3674 error_printf("Defaulting to \"bh\"");
a697a334
AW
3675 }
3676
2eef278b
MT
3677 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3678 n->net_conf.tx_queue_size);
9b02e161 3679
441537f1 3680 for (i = 0; i < n->max_queue_pairs; i++) {
f9d6dbf0 3681 virtio_net_add_queue(n, i);
a697a334 3682 }
da51a335 3683
17a0ca55 3684 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1773d9ee
FK
3685 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3686 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
554c97dd 3687 n->status = VIRTIO_NET_S_LINK_UP;
9d8c6a25
DDAG
3688 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3689 QEMU_CLOCK_VIRTUAL,
3690 virtio_net_announce_timer, n);
b2c929f0 3691 n->announce_timer.round = 0;
fbe78f4f 3692
8a253ec2
FK
3693 if (n->netclient_type) {
3694 /*
3695 * Happen when virtio_net_set_netclient_name has been called.
3696 */
3697 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3698 n->netclient_type, n->netclient_name, n);
3699 } else {
3700 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
284a32f0 3701 object_get_typename(OBJECT(dev)), dev->id, n);
8a253ec2
FK
3702 }
3703
441537f1 3704 for (i = 0; i < n->max_queue_pairs; i++) {
d4c62930
BM
3705 n->nic->ncs[i].do_not_pad = true;
3706 }
3707
6e371ab8
MT
3708 peer_test_vnet_hdr(n);
3709 if (peer_has_vnet_hdr(n)) {
441537f1 3710 for (i = 0; i < n->max_queue_pairs; i++) {
d6085e3a 3711 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
fed699f9 3712 }
6e371ab8
MT
3713 n->host_hdr_len = sizeof(struct virtio_net_hdr);
3714 } else {
3715 n->host_hdr_len = 0;
3716 }
eb6b6c12 3717
1773d9ee 3718 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
96d5e201 3719
fed699f9 3720 n->vqs[0].tx_waiting = 0;
1773d9ee 3721 n->tx_burst = n->net_conf.txburst;
e22f0603 3722 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
002437cd 3723 n->promisc = 1; /* for compatibility */
fbe78f4f 3724
7267c094 3725 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
b6503ed9 3726
7267c094 3727 n->vlans = g_malloc0(MAX_VLAN >> 3);
f21c0ed9 3728
b1be4280
AK
3729 nc = qemu_get_queue(n->nic);
3730 nc->rxfilter_notify_enabled = 1;
3731
e87936ea
CL
3732 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3733 struct virtio_net_config netcfg = {};
3734 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3735 vhost_net_set_config(get_vhost_net(nc->peer),
f8ed3648 3736 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
e87936ea 3737 }
2974e916 3738 QTAILQ_INIT(&n->rsc_chains);
284a32f0 3739 n->qdev = dev;
4474e37a 3740
aac8f89d 3741 net_rx_pkt_init(&n->rx_pkt);
0145c393
AM
3742
3743 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3744 virtio_net_load_ebpf(n);
3745 }
17ec5a86
FK
3746}
3747
b69c3c21 3748static void virtio_net_device_unrealize(DeviceState *dev)
17ec5a86 3749{
306ec6c3
AF
3750 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3751 VirtIONet *n = VIRTIO_NET(dev);
441537f1 3752 int i, max_queue_pairs;
17ec5a86 3753
0145c393
AM
3754 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3755 virtio_net_unload_ebpf(n);
3756 }
3757
17ec5a86
FK
3758 /* This will stop vhost backend if appropriate. */
3759 virtio_net_set_status(vdev, 0);
3760
9e288406
MA
3761 g_free(n->netclient_name);
3762 n->netclient_name = NULL;
3763 g_free(n->netclient_type);
3764 n->netclient_type = NULL;
8a253ec2 3765
17ec5a86
FK
3766 g_free(n->mac_table.macs);
3767 g_free(n->vlans);
3768
9711cd0d 3769 if (n->failover) {
f3558b1b 3770 qobject_unref(n->primary_opts);
65018100 3771 device_listener_unregister(&n->primary_listener);
1e157667 3772 remove_migration_state_change_notifier(&n->migration_state);
f3558b1b
KW
3773 } else {
3774 assert(n->primary_opts == NULL);
9711cd0d
JF
3775 }
3776
441537f1
JW
3777 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3778 for (i = 0; i < max_queue_pairs; i++) {
f9d6dbf0 3779 virtio_net_del_queue(n, i);
17ec5a86 3780 }
d945d9f1 3781 /* delete also control vq */
441537f1 3782 virtio_del_queue(vdev, max_queue_pairs * 2);
944458b6 3783 qemu_announce_timer_del(&n->announce_timer, false);
17ec5a86
FK
3784 g_free(n->vqs);
3785 qemu_del_nic(n->nic);
2974e916 3786 virtio_net_rsc_cleanup(n);
59079029 3787 g_free(n->rss_data.indirections_table);
4474e37a 3788 net_rx_pkt_uninit(n->rx_pkt);
6a1a8cc7 3789 virtio_cleanup(vdev);
17ec5a86
FK
3790}
3791
3792static void virtio_net_instance_init(Object *obj)
3793{
3794 VirtIONet *n = VIRTIO_NET(obj);
3795
3796 /*
3797 * The default config_size is sizeof(struct virtio_net_config).
3798 * Can be overriden with virtio_net_set_config_size.
3799 */
3800 n->config_size = sizeof(struct virtio_net_config);
aa4197c3
GA
3801 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3802 "bootindex", "/ethernet-phy@0",
40c2281c 3803 DEVICE(n));
0145c393
AM
3804
3805 ebpf_rss_init(&n->ebpf_rss);
17ec5a86
FK
3806}
3807
44b1ff31 3808static int virtio_net_pre_save(void *opaque)
4d45dcfb
HP
3809{
3810 VirtIONet *n = opaque;
3811
3812 /* At this point, backend must be stopped, otherwise
3813 * it might keep writing to memory. */
3814 assert(!n->vhost_started);
44b1ff31
DDAG
3815
3816 return 0;
4d45dcfb
HP
3817}
3818
9711cd0d
JF
3819static bool primary_unplug_pending(void *opaque)
3820{
3821 DeviceState *dev = opaque;
21e8709b 3822 DeviceState *primary;
9711cd0d
JF
3823 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3824 VirtIONet *n = VIRTIO_NET(vdev);
3825
284f42a5
JF
3826 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3827 return false;
3828 }
21e8709b
JQ
3829 primary = failover_find_primary_device(n);
3830 return primary ? primary->pending_deleted_event : false;
9711cd0d
JF
3831}
3832
3833static bool dev_unplug_pending(void *opaque)
3834{
3835 DeviceState *dev = opaque;
3836 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3837
3838 return vdc->primary_unplug_pending(dev);
3839}
3840
c255488d
JP
3841static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3842{
3843 VirtIONet *n = VIRTIO_NET(vdev);
3844 NetClientState *nc = qemu_get_queue(n->nic);
3845 struct vhost_net *net = get_vhost_net(nc->peer);
3846 return &net->dev;
3847}
3848
4d45dcfb
HP
3849static const VMStateDescription vmstate_virtio_net = {
3850 .name = "virtio-net",
3851 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3852 .version_id = VIRTIO_NET_VM_VERSION,
3853 .fields = (VMStateField[]) {
3854 VMSTATE_VIRTIO_DEVICE,
3855 VMSTATE_END_OF_LIST()
3856 },
3857 .pre_save = virtio_net_pre_save,
9711cd0d 3858 .dev_unplug_pending = dev_unplug_pending,
4d45dcfb 3859};
290c2428 3860
17ec5a86 3861static Property virtio_net_properties[] = {
127833ee
JB
3862 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3863 VIRTIO_NET_F_CSUM, true),
3864 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
87108bb2 3865 VIRTIO_NET_F_GUEST_CSUM, true),
127833ee
JB
3866 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3867 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
87108bb2 3868 VIRTIO_NET_F_GUEST_TSO4, true),
127833ee 3869 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
87108bb2 3870 VIRTIO_NET_F_GUEST_TSO6, true),
127833ee 3871 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
87108bb2 3872 VIRTIO_NET_F_GUEST_ECN, true),
127833ee 3873 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
87108bb2 3874 VIRTIO_NET_F_GUEST_UFO, true),
127833ee 3875 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
87108bb2 3876 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
127833ee 3877 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
87108bb2 3878 VIRTIO_NET_F_HOST_TSO4, true),
127833ee 3879 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
87108bb2 3880 VIRTIO_NET_F_HOST_TSO6, true),
127833ee 3881 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
87108bb2 3882 VIRTIO_NET_F_HOST_ECN, true),
127833ee 3883 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
87108bb2 3884 VIRTIO_NET_F_HOST_UFO, true),
127833ee 3885 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
87108bb2 3886 VIRTIO_NET_F_MRG_RXBUF, true),
127833ee 3887 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
87108bb2 3888 VIRTIO_NET_F_STATUS, true),
127833ee 3889 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
87108bb2 3890 VIRTIO_NET_F_CTRL_VQ, true),
127833ee 3891 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
87108bb2 3892 VIRTIO_NET_F_CTRL_RX, true),
127833ee 3893 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
87108bb2 3894 VIRTIO_NET_F_CTRL_VLAN, true),
127833ee 3895 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
87108bb2 3896 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
127833ee 3897 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
87108bb2 3898 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
127833ee 3899 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
87108bb2 3900 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
127833ee 3901 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
59079029
YB
3902 DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3903 VIRTIO_NET_F_RSS, false),
e22f0603
YB
3904 DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3905 VIRTIO_NET_F_HASH_REPORT, false),
2974e916
YB
3906 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3907 VIRTIO_NET_F_RSC_EXT, false),
3908 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3909 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
17ec5a86
FK
3910 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3911 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
87108bb2 3912 TX_TIMER_INTERVAL),
17ec5a86
FK
3913 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3914 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
1c0fbfa3
MT
3915 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3916 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
9b02e161
WW
3917 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3918 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
a93e599d 3919 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
75ebec11
MC
3920 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3921 true),
9473939e
JB
3922 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3923 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
9711cd0d 3924 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
17ec5a86
FK
3925 DEFINE_PROP_END_OF_LIST(),
3926};
3927
3928static void virtio_net_class_init(ObjectClass *klass, void *data)
3929{
3930 DeviceClass *dc = DEVICE_CLASS(klass);
3931 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
e6f746b3 3932
4f67d30b 3933 device_class_set_props(dc, virtio_net_properties);
290c2428 3934 dc->vmsd = &vmstate_virtio_net;
125ee0ed 3935 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
e6f746b3 3936 vdc->realize = virtio_net_device_realize;
306ec6c3 3937 vdc->unrealize = virtio_net_device_unrealize;
17ec5a86
FK
3938 vdc->get_config = virtio_net_get_config;
3939 vdc->set_config = virtio_net_set_config;
3940 vdc->get_features = virtio_net_get_features;
3941 vdc->set_features = virtio_net_set_features;
3942 vdc->bad_features = virtio_net_bad_features;
3943 vdc->reset = virtio_net_reset;
7dc6be52 3944 vdc->queue_reset = virtio_net_queue_reset;
7f863302 3945 vdc->queue_enable = virtio_net_queue_enable;
17ec5a86
FK
3946 vdc->set_status = virtio_net_set_status;
3947 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3948 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
2a083ffd 3949 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
7788c3f2 3950 vdc->post_load = virtio_net_post_load_virtio;
982b78c5 3951 vdc->vmsd = &vmstate_virtio_net_device;
9711cd0d 3952 vdc->primary_unplug_pending = primary_unplug_pending;
c255488d 3953 vdc->get_vhost = virtio_net_get_vhost;
17ec5a86
FK
3954}
3955
3956static const TypeInfo virtio_net_info = {
3957 .name = TYPE_VIRTIO_NET,
3958 .parent = TYPE_VIRTIO_DEVICE,
3959 .instance_size = sizeof(VirtIONet),
3960 .instance_init = virtio_net_instance_init,
3961 .class_init = virtio_net_class_init,
3962};
3963
3964static void virtio_register_types(void)
3965{
3966 type_register_static(&virtio_net_info);
3967}
3968
3969type_init(virtio_register_types)