]> git.proxmox.com Git - mirror_qemu.git/blame - hw/net/virtio-net.c
hw/timer/hpet: Convert DPRINTF to trace events
[mirror_qemu.git] / hw / net / virtio-net.c
CommitLineData
fbe78f4f
AL
1/*
2 * Virtio Network Device
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
9b8bfe21 14#include "qemu/osdep.h"
9711cd0d 15#include "qemu/atomic.h"
1de7afc9 16#include "qemu/iov.h"
68b0a639 17#include "qemu/log.h"
db725815 18#include "qemu/main-loop.h"
0b8fa32f 19#include "qemu/module.h"
0d09e41a 20#include "hw/virtio/virtio.h"
1422e32d 21#include "net/net.h"
7200ac3c 22#include "net/checksum.h"
a8ed73f7 23#include "net/tap.h"
1de7afc9
PB
24#include "qemu/error-report.h"
25#include "qemu/timer.h"
9711cd0d
JF
26#include "qemu/option.h"
27#include "qemu/option_int.h"
28#include "qemu/config-file.h"
29#include "qapi/qmp/qdict.h"
0d09e41a
PB
30#include "hw/virtio/virtio-net.h"
31#include "net/vhost_net.h"
9d8c6a25 32#include "net/announce.h"
17ec5a86 33#include "hw/virtio/virtio-bus.h"
e688df6b 34#include "qapi/error.h"
9af23989 35#include "qapi/qapi-events-net.h"
a27bd6c7 36#include "hw/qdev-properties.h"
9711cd0d
JF
37#include "qapi/qapi-types-migration.h"
38#include "qapi/qapi-events-migration.h"
1399c60d 39#include "hw/virtio/virtio-access.h"
f8d806c9 40#include "migration/misc.h"
9473939e 41#include "standard-headers/linux/ethtool.h"
2f780b6a 42#include "sysemu/sysemu.h"
9d8c6a25 43#include "trace.h"
9711cd0d 44#include "monitor/qdev.h"
edf5ca5d 45#include "hw/pci/pci_device.h"
4474e37a 46#include "net_rx_pkt.h"
108a6481 47#include "hw/virtio/vhost.h"
1b529d90 48#include "sysemu/qtest.h"
fbe78f4f 49
0ce0e8f4 50#define VIRTIO_NET_VM_VERSION 11
b6503ed9 51
1c0fbfa3
MT
52/* previously fixed value */
53#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
9b02e161
WW
54#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
55
441537f1 56/* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
1c0fbfa3 57#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
9b02e161 58#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
1c0fbfa3 59
2974e916
YB
60#define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
61
62#define VIRTIO_NET_TCP_FLAG 0x3F
63#define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
64
65/* IPv4 max payload, 16 bits in the header */
66#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
67#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
68
69/* header length value in ip header without option */
70#define VIRTIO_NET_IP4_HEADER_LENGTH 5
71
72#define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
73#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
74
75/* Purge coalesced packets timer interval, This value affects the performance
76 a lot, and should be tuned carefully, '300000'(300us) is the recommended
77 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
78 tso/gso/gro 'off'. */
79#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
80
59079029
YB
81#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
82 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
83 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
84 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
85 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
86 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
87 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
88 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
89 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
90
ad6461ad 91static const VirtIOFeature feature_sizes[] = {
127833ee 92 {.flags = 1ULL << VIRTIO_NET_F_MAC,
5d5b33c0 93 .end = endof(struct virtio_net_config, mac)},
127833ee 94 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
5d5b33c0 95 .end = endof(struct virtio_net_config, status)},
127833ee 96 {.flags = 1ULL << VIRTIO_NET_F_MQ,
5d5b33c0 97 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
127833ee 98 {.flags = 1ULL << VIRTIO_NET_F_MTU,
5d5b33c0 99 .end = endof(struct virtio_net_config, mtu)},
9473939e 100 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
5d5b33c0 101 .end = endof(struct virtio_net_config, duplex)},
e22f0603 102 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
59079029 103 .end = endof(struct virtio_net_config, supported_hash_types)},
14f9b664
JL
104 {}
105};
106
d74c30c8
DT
107static const VirtIOConfigSizeParams cfg_size_params = {
108 .min_size = endof(struct virtio_net_config, mac),
109 .max_size = sizeof(struct virtio_net_config),
110 .feature_sizes = feature_sizes
111};
112
fed699f9 113static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
0c87e93e
JW
114{
115 VirtIONet *n = qemu_get_nic_opaque(nc);
116
fed699f9 117 return &n->vqs[nc->queue_index];
0c87e93e 118}
fed699f9
JW
119
120static int vq2q(int queue_index)
121{
122 return queue_index / 2;
123}
124
4fdf69ab
KX
125static void flush_or_purge_queued_packets(NetClientState *nc)
126{
127 if (!nc->peer) {
128 return;
129 }
130
131 qemu_flush_or_purge_queued_packets(nc->peer, true);
132 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
133}
134
fbe78f4f
AL
135/* TODO
136 * - we could suppress RX interrupt if we were so inclined.
137 */
138
0f03eca6 139static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
fbe78f4f 140{
17a0ca55 141 VirtIONet *n = VIRTIO_NET(vdev);
fbe78f4f 142 struct virtio_net_config netcfg;
c546ecf2 143 NetClientState *nc = qemu_get_queue(n->nic);
fb592882 144 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
fbe78f4f 145
108a6481
CL
146 int ret = 0;
147 memset(&netcfg, 0 , sizeof(struct virtio_net_config));
1399c60d 148 virtio_stw_p(vdev, &netcfg.status, n->status);
441537f1 149 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
a93e599d 150 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
79674068 151 memcpy(netcfg.mac, n->mac, ETH_ALEN);
9473939e
JB
152 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
153 netcfg.duplex = n->net_conf.duplex;
59079029
YB
154 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
155 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
e22f0603
YB
156 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
157 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
59079029
YB
158 virtio_stl_p(vdev, &netcfg.supported_hash_types,
159 VIRTIO_NET_RSS_SUPPORTED_HASHES);
14f9b664 160 memcpy(config, &netcfg, n->config_size);
108a6481 161
c546ecf2
JW
162 /*
163 * Is this VDPA? No peer means not VDPA: there's no way to
164 * disconnect/reconnect a VDPA peer.
165 */
166 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
108a6481 167 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
c546ecf2 168 n->config_size);
ebc141a6
EP
169 if (ret == -1) {
170 return;
c546ecf2 171 }
ebc141a6
EP
172
173 /*
174 * Some NIC/kernel combinations present 0 as the mac address. As that
175 * is not a legal address, try to proceed with the address from the
176 * QEMU command line in the hope that the address has been configured
177 * correctly elsewhere - just not reported by the device.
178 */
179 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
180 info_report("Zero hardware mac address detected. Ignoring.");
181 memcpy(netcfg.mac, n->mac, ETH_ALEN);
182 }
183
4f93aafc
EP
184 netcfg.status |= virtio_tswap16(vdev,
185 n->status & VIRTIO_NET_S_ANNOUNCE);
ebc141a6 186 memcpy(config, &netcfg, n->config_size);
108a6481 187 }
fbe78f4f
AL
188}
189
0f03eca6
AL
190static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
191{
17a0ca55 192 VirtIONet *n = VIRTIO_NET(vdev);
14f9b664 193 struct virtio_net_config netcfg = {};
c546ecf2 194 NetClientState *nc = qemu_get_queue(n->nic);
0f03eca6 195
14f9b664 196 memcpy(&netcfg, config, n->config_size);
0f03eca6 197
95129d6f
CH
198 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
199 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
c1943a3f 200 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
79674068 201 memcpy(n->mac, netcfg.mac, ETH_ALEN);
b356f76d 202 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
0f03eca6 203 }
108a6481 204
c546ecf2
JW
205 /*
206 * Is this VDPA? No peer means not VDPA: there's no way to
207 * disconnect/reconnect a VDPA peer.
208 */
209 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
210 vhost_net_set_config(get_vhost_net(nc->peer),
211 (uint8_t *)&netcfg, 0, n->config_size,
f8ed3648 212 VHOST_SET_CONFIG_TYPE_FRONTEND);
108a6481 213 }
0f03eca6
AL
214}
215
783e7706
MT
216static bool virtio_net_started(VirtIONet *n, uint8_t status)
217{
17a0ca55 218 VirtIODevice *vdev = VIRTIO_DEVICE(n);
783e7706 219 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
17a0ca55 220 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
783e7706
MT
221}
222
b2c929f0
DDAG
223static void virtio_net_announce_notify(VirtIONet *net)
224{
225 VirtIODevice *vdev = VIRTIO_DEVICE(net);
226 trace_virtio_net_announce_notify();
227
228 net->status |= VIRTIO_NET_S_ANNOUNCE;
229 virtio_notify_config(vdev);
230}
231
f57fcf70
JW
232static void virtio_net_announce_timer(void *opaque)
233{
234 VirtIONet *n = opaque;
9d8c6a25 235 trace_virtio_net_announce_timer(n->announce_timer.round);
f57fcf70 236
9d8c6a25 237 n->announce_timer.round--;
b2c929f0
DDAG
238 virtio_net_announce_notify(n);
239}
240
241static void virtio_net_announce(NetClientState *nc)
242{
243 VirtIONet *n = qemu_get_nic_opaque(nc);
244 VirtIODevice *vdev = VIRTIO_DEVICE(n);
245
246 /*
247 * Make sure the virtio migration announcement timer isn't running
248 * If it is, let it trigger announcement so that we do not cause
249 * confusion.
250 */
251 if (n->announce_timer.round) {
252 return;
253 }
254
255 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
256 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
257 virtio_net_announce_notify(n);
258 }
f57fcf70
JW
259}
260
783e7706 261static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
afbaa7b4 262{
17a0ca55 263 VirtIODevice *vdev = VIRTIO_DEVICE(n);
b356f76d 264 NetClientState *nc = qemu_get_queue(n->nic);
441537f1 265 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
aa858194
SWL
266 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
267 n->max_ncs - n->max_queue_pairs : 0;
b356f76d 268
ed8b4afe 269 if (!get_vhost_net(nc->peer)) {
afbaa7b4
MT
270 return;
271 }
fed699f9 272
8c1ac475
RK
273 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
274 !!n->vhost_started) {
afbaa7b4
MT
275 return;
276 }
277 if (!n->vhost_started) {
086abc1c
MT
278 int r, i;
279
1bfa316c
GK
280 if (n->needs_vnet_hdr_swap) {
281 error_report("backend does not support %s vnet headers; "
282 "falling back on userspace virtio",
283 virtio_is_big_endian(vdev) ? "BE" : "LE");
284 return;
285 }
286
086abc1c
MT
287 /* Any packets outstanding? Purge them to avoid touching rings
288 * when vhost is running.
289 */
441537f1 290 for (i = 0; i < queue_pairs; i++) {
086abc1c
MT
291 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
292
293 /* Purge both directions: TX and RX. */
294 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
295 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
296 }
297
a93e599d
MC
298 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
299 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
300 if (r < 0) {
301 error_report("%uBytes MTU not supported by the backend",
302 n->net_conf.mtu);
303
304 return;
305 }
306 }
307
1830b80f 308 n->vhost_started = 1;
22288fe5 309 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
afbaa7b4 310 if (r < 0) {
e7b43f7e
SH
311 error_report("unable to start vhost net: %d: "
312 "falling back on userspace virtio", -r);
1830b80f 313 n->vhost_started = 0;
afbaa7b4
MT
314 }
315 } else {
22288fe5 316 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
afbaa7b4
MT
317 n->vhost_started = 0;
318 }
319}
320
1bfa316c
GK
321static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
322 NetClientState *peer,
323 bool enable)
324{
325 if (virtio_is_big_endian(vdev)) {
326 return qemu_set_vnet_be(peer, enable);
327 } else {
328 return qemu_set_vnet_le(peer, enable);
329 }
330}
331
332static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
441537f1 333 int queue_pairs, bool enable)
1bfa316c
GK
334{
335 int i;
336
441537f1 337 for (i = 0; i < queue_pairs; i++) {
1bfa316c
GK
338 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
339 enable) {
340 while (--i >= 0) {
341 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
342 }
343
344 return true;
345 }
346 }
347
348 return false;
349}
350
351static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
352{
353 VirtIODevice *vdev = VIRTIO_DEVICE(n);
441537f1 354 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
1bfa316c
GK
355
356 if (virtio_net_started(n, status)) {
357 /* Before using the device, we tell the network backend about the
358 * endianness to use when parsing vnet headers. If the backend
359 * can't do it, we fallback onto fixing the headers in the core
360 * virtio-net code.
361 */
362 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
441537f1 363 queue_pairs, true);
1bfa316c
GK
364 } else if (virtio_net_started(n, vdev->status)) {
365 /* After using the device, we need to reset the network backend to
366 * the default (guest native endianness), otherwise the guest may
367 * lose network connectivity if it is rebooted into a different
368 * endianness.
369 */
441537f1 370 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
1bfa316c
GK
371 }
372}
373
283e2c2a
YB
374static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
375{
376 unsigned int dropped = virtqueue_drop_all(vq);
377 if (dropped) {
378 virtio_notify(vdev, vq);
379 }
380}
381
783e7706
MT
382static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
383{
17a0ca55 384 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9
JW
385 VirtIONetQueue *q;
386 int i;
387 uint8_t queue_status;
783e7706 388
1bfa316c 389 virtio_net_vnet_endian_status(n, status);
783e7706
MT
390 virtio_net_vhost_status(n, status);
391
441537f1 392 for (i = 0; i < n->max_queue_pairs; i++) {
38705bb5
FZ
393 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
394 bool queue_started;
fed699f9 395 q = &n->vqs[i];
783e7706 396
441537f1 397 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
fed699f9 398 queue_status = 0;
783e7706 399 } else {
fed699f9 400 queue_status = status;
783e7706 401 }
38705bb5
FZ
402 queue_started =
403 virtio_net_started(n, queue_status) && !n->vhost_started;
404
405 if (queue_started) {
406 qemu_flush_queued_packets(ncs);
407 }
fed699f9
JW
408
409 if (!q->tx_waiting) {
410 continue;
411 }
412
38705bb5 413 if (queue_started) {
fed699f9 414 if (q->tx_timer) {
bc72ad67
AB
415 timer_mod(q->tx_timer,
416 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
fed699f9
JW
417 } else {
418 qemu_bh_schedule(q->tx_bh);
419 }
783e7706 420 } else {
fed699f9 421 if (q->tx_timer) {
bc72ad67 422 timer_del(q->tx_timer);
fed699f9
JW
423 } else {
424 qemu_bh_cancel(q->tx_bh);
425 }
283e2c2a 426 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
70e53e6e
JW
427 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
428 vdev->vm_running) {
283e2c2a
YB
429 /* if tx is waiting we are likely have some packets in tx queue
430 * and disabled notification */
431 q->tx_waiting = 0;
432 virtio_queue_set_notification(q->tx_vq, 1);
433 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
434 }
783e7706
MT
435 }
436 }
437}
438
4e68f7a0 439static void virtio_net_set_link_status(NetClientState *nc)
554c97dd 440{
cc1f0f45 441 VirtIONet *n = qemu_get_nic_opaque(nc);
17a0ca55 442 VirtIODevice *vdev = VIRTIO_DEVICE(n);
554c97dd
AL
443 uint16_t old_status = n->status;
444
eb6b6c12 445 if (nc->link_down)
554c97dd
AL
446 n->status &= ~VIRTIO_NET_S_LINK_UP;
447 else
448 n->status |= VIRTIO_NET_S_LINK_UP;
449
450 if (n->status != old_status)
17a0ca55 451 virtio_notify_config(vdev);
afbaa7b4 452
17a0ca55 453 virtio_net_set_status(vdev, vdev->status);
554c97dd
AL
454}
455
b1be4280
AK
456static void rxfilter_notify(NetClientState *nc)
457{
b1be4280
AK
458 VirtIONet *n = qemu_get_nic_opaque(nc);
459
460 if (nc->rxfilter_notify_enabled) {
ddfb0baa 461 char *path = object_get_canonical_path(OBJECT(n->qdev));
7480874a 462 qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
96e35046 463 g_free(path);
b1be4280
AK
464
465 /* disable event notification to avoid events flooding */
466 nc->rxfilter_notify_enabled = 0;
467 }
468}
469
f7bc8ef8
AK
470static intList *get_vlan_table(VirtIONet *n)
471{
54aa3de7 472 intList *list;
f7bc8ef8
AK
473 int i, j;
474
475 list = NULL;
476 for (i = 0; i < MAX_VLAN >> 5; i++) {
477 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
478 if (n->vlans[i] & (1U << j)) {
54aa3de7 479 QAPI_LIST_PREPEND(list, (i << 5) + j);
f7bc8ef8
AK
480 }
481 }
482 }
483
484 return list;
485}
486
b1be4280
AK
487static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
488{
489 VirtIONet *n = qemu_get_nic_opaque(nc);
f7bc8ef8 490 VirtIODevice *vdev = VIRTIO_DEVICE(n);
b1be4280 491 RxFilterInfo *info;
54aa3de7 492 strList *str_list;
f7bc8ef8 493 int i;
b1be4280
AK
494
495 info = g_malloc0(sizeof(*info));
496 info->name = g_strdup(nc->name);
497 info->promiscuous = n->promisc;
498
499 if (n->nouni) {
500 info->unicast = RX_STATE_NONE;
501 } else if (n->alluni) {
502 info->unicast = RX_STATE_ALL;
503 } else {
504 info->unicast = RX_STATE_NORMAL;
505 }
506
507 if (n->nomulti) {
508 info->multicast = RX_STATE_NONE;
509 } else if (n->allmulti) {
510 info->multicast = RX_STATE_ALL;
511 } else {
512 info->multicast = RX_STATE_NORMAL;
513 }
514
515 info->broadcast_allowed = n->nobcast;
516 info->multicast_overflow = n->mac_table.multi_overflow;
517 info->unicast_overflow = n->mac_table.uni_overflow;
518
b0575ba4 519 info->main_mac = qemu_mac_strdup_printf(n->mac);
b1be4280
AK
520
521 str_list = NULL;
522 for (i = 0; i < n->mac_table.first_multi; i++) {
54aa3de7
EB
523 QAPI_LIST_PREPEND(str_list,
524 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
b1be4280
AK
525 }
526 info->unicast_table = str_list;
527
528 str_list = NULL;
529 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
54aa3de7
EB
530 QAPI_LIST_PREPEND(str_list,
531 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
b1be4280
AK
532 }
533 info->multicast_table = str_list;
f7bc8ef8 534 info->vlan_table = get_vlan_table(n);
b1be4280 535
95129d6f 536 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
f7bc8ef8
AK
537 info->vlan = RX_STATE_ALL;
538 } else if (!info->vlan_table) {
539 info->vlan = RX_STATE_NONE;
540 } else {
541 info->vlan = RX_STATE_NORMAL;
b1be4280 542 }
b1be4280
AK
543
544 /* enable event notification after query */
545 nc->rxfilter_notify_enabled = 1;
546
547 return info;
548}
549
7dc6be52
XZ
550static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
551{
552 VirtIONet *n = VIRTIO_NET(vdev);
f47af0af
XZ
553 NetClientState *nc;
554
555 /* validate queue_index and skip for cvq */
556 if (queue_index >= n->max_queue_pairs * 2) {
557 return;
558 }
559
560 nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
7dc6be52
XZ
561
562 if (!nc->peer) {
563 return;
564 }
565
566 if (get_vhost_net(nc->peer) &&
567 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
568 vhost_net_virtqueue_reset(vdev, nc, queue_index);
569 }
570
571 flush_or_purge_queued_packets(nc);
572}
573
7f863302
KX
574static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
575{
576 VirtIONet *n = VIRTIO_NET(vdev);
f47af0af 577 NetClientState *nc;
7f863302
KX
578 int r;
579
f47af0af
XZ
580 /* validate queue_index and skip for cvq */
581 if (queue_index >= n->max_queue_pairs * 2) {
582 return;
583 }
584
585 nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
586
7f863302
KX
587 if (!nc->peer || !vdev->vhost_started) {
588 return;
589 }
590
591 if (get_vhost_net(nc->peer) &&
592 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
593 r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
594 if (r < 0) {
595 error_report("unable to restart vhost net virtqueue: %d, "
596 "when resetting the queue", queue_index);
597 }
598 }
599}
600
002437cd
AL
601static void virtio_net_reset(VirtIODevice *vdev)
602{
17a0ca55 603 VirtIONet *n = VIRTIO_NET(vdev);
94b52958 604 int i;
002437cd
AL
605
606 /* Reset back to compatibility mode */
607 n->promisc = 1;
608 n->allmulti = 0;
015cb166
AW
609 n->alluni = 0;
610 n->nomulti = 0;
611 n->nouni = 0;
612 n->nobcast = 0;
fed699f9 613 /* multiqueue is disabled by default */
441537f1 614 n->curr_queue_pairs = 1;
9d8c6a25
DDAG
615 timer_del(n->announce_timer.tm);
616 n->announce_timer.round = 0;
f57fcf70 617 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
b6503ed9 618
f21c0ed9 619 /* Flush any MAC and VLAN filter table state */
b6503ed9 620 n->mac_table.in_use = 0;
2d9aba39 621 n->mac_table.first_multi = 0;
8fd2a2f1
AW
622 n->mac_table.multi_overflow = 0;
623 n->mac_table.uni_overflow = 0;
b6503ed9 624 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
41dc8a67 625 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
702d66a8 626 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
f21c0ed9 627 memset(n->vlans, 0, MAX_VLAN >> 3);
94b52958
GK
628
629 /* Flush any async TX */
441537f1 630 for (i = 0; i < n->max_queue_pairs; i++) {
4fdf69ab 631 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
94b52958 632 }
002437cd
AL
633}
634
6e371ab8 635static void peer_test_vnet_hdr(VirtIONet *n)
3a330134 636{
b356f76d
JW
637 NetClientState *nc = qemu_get_queue(n->nic);
638 if (!nc->peer) {
6e371ab8 639 return;
b356f76d 640 }
3a330134 641
d6085e3a 642 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
6e371ab8 643}
3a330134 644
6e371ab8
MT
645static int peer_has_vnet_hdr(VirtIONet *n)
646{
3a330134
MM
647 return n->has_vnet_hdr;
648}
649
0ce0e8f4
MM
650static int peer_has_ufo(VirtIONet *n)
651{
652 if (!peer_has_vnet_hdr(n))
653 return 0;
654
d6085e3a 655 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
0ce0e8f4
MM
656
657 return n->has_ufo;
658}
659
53da8b5a
YB
660static int peer_has_uso(VirtIONet *n)
661{
662 if (!peer_has_vnet_hdr(n)) {
663 return 0;
664 }
665
666 return qemu_has_uso(qemu_get_queue(n->nic)->peer);
667}
668
bb9d17f8 669static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
e22f0603 670 int version_1, int hash_report)
ff3a8066 671{
fed699f9
JW
672 int i;
673 NetClientState *nc;
674
ff3a8066
MT
675 n->mergeable_rx_bufs = mergeable_rx_bufs;
676
bb9d17f8 677 if (version_1) {
e22f0603
YB
678 n->guest_hdr_len = hash_report ?
679 sizeof(struct virtio_net_hdr_v1_hash) :
680 sizeof(struct virtio_net_hdr_mrg_rxbuf);
681 n->rss_data.populate_hash = !!hash_report;
bb9d17f8
CH
682 } else {
683 n->guest_hdr_len = n->mergeable_rx_bufs ?
684 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
685 sizeof(struct virtio_net_hdr);
686 }
ff3a8066 687
441537f1 688 for (i = 0; i < n->max_queue_pairs; i++) {
fed699f9
JW
689 nc = qemu_get_subqueue(n->nic, i);
690
691 if (peer_has_vnet_hdr(n) &&
d6085e3a
SH
692 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
693 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
fed699f9
JW
694 n->host_hdr_len = n->guest_hdr_len;
695 }
ff3a8066
MT
696 }
697}
698
2eef278b
MT
699static int virtio_net_max_tx_queue_size(VirtIONet *n)
700{
701 NetClientState *peer = n->nic_conf.peers.ncs[0];
702
703 /*
0ea5778f
EP
704 * Backends other than vhost-user or vhost-vdpa don't support max queue
705 * size.
2eef278b
MT
706 */
707 if (!peer) {
708 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
709 }
710
0ea5778f
EP
711 switch(peer->info->type) {
712 case NET_CLIENT_DRIVER_VHOST_USER:
713 case NET_CLIENT_DRIVER_VHOST_VDPA:
714 return VIRTQUEUE_MAX_SIZE;
715 default:
2eef278b 716 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
0ea5778f 717 };
2eef278b
MT
718}
719
fed699f9
JW
720static int peer_attach(VirtIONet *n, int index)
721{
722 NetClientState *nc = qemu_get_subqueue(n->nic, index);
723
724 if (!nc->peer) {
725 return 0;
726 }
727
f394b2e2 728 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
7263a0ad
CO
729 vhost_set_vring_enable(nc->peer, 1);
730 }
731
f394b2e2 732 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
fed699f9
JW
733 return 0;
734 }
735
441537f1 736 if (n->max_queue_pairs == 1) {
1074b879
JW
737 return 0;
738 }
739
fed699f9
JW
740 return tap_enable(nc->peer);
741}
742
743static int peer_detach(VirtIONet *n, int index)
744{
745 NetClientState *nc = qemu_get_subqueue(n->nic, index);
746
747 if (!nc->peer) {
748 return 0;
749 }
750
f394b2e2 751 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
7263a0ad
CO
752 vhost_set_vring_enable(nc->peer, 0);
753 }
754
f394b2e2 755 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
fed699f9
JW
756 return 0;
757 }
758
759 return tap_disable(nc->peer);
760}
761
441537f1 762static void virtio_net_set_queue_pairs(VirtIONet *n)
fed699f9
JW
763{
764 int i;
ddfa83ea 765 int r;
fed699f9 766
68b5f314
YB
767 if (n->nic->peer_deleted) {
768 return;
769 }
770
441537f1
JW
771 for (i = 0; i < n->max_queue_pairs; i++) {
772 if (i < n->curr_queue_pairs) {
ddfa83ea
JS
773 r = peer_attach(n, i);
774 assert(!r);
fed699f9 775 } else {
ddfa83ea
JS
776 r = peer_detach(n, i);
777 assert(!r);
fed699f9
JW
778 }
779 }
780}
781
ec57db16 782static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
fed699f9 783
9d5b731d
JW
784static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
785 Error **errp)
fbe78f4f 786{
17a0ca55 787 VirtIONet *n = VIRTIO_NET(vdev);
b356f76d 788 NetClientState *nc = qemu_get_queue(n->nic);
fbe78f4f 789
da3e8a23
SZ
790 /* Firstly sync all virtio-net possible supported features */
791 features |= n->host_features;
792
0cd09c3a 793 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
c9f79a3f 794
6e371ab8 795 if (!peer_has_vnet_hdr(n)) {
0cd09c3a
CH
796 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
797 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
798 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
799 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
8172539d 800
0cd09c3a
CH
801 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
802 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
803 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
804 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
e22f0603 805
53da8b5a
YB
806 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
807 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
808 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
809
e22f0603 810 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
8172539d 811 }
3a330134 812
8172539d 813 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
0cd09c3a
CH
814 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
815 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
3a330134
MM
816 }
817
53da8b5a
YB
818 if (!peer_has_uso(n)) {
819 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
820 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
821 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
822 }
823
ed8b4afe 824 if (!get_vhost_net(nc->peer)) {
9bc6304c
MT
825 return features;
826 }
2974e916 827
0145c393
AM
828 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
829 virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
830 }
75ebec11
MC
831 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
832 vdev->backend_features = features;
833
834 if (n->mtu_bypass_backend &&
835 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
836 features |= (1ULL << VIRTIO_NET_F_MTU);
837 }
838
cd69d47c
EP
839 /*
840 * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
841 * enabled. This happens in the vDPA case.
842 *
843 * Make sure the feature set is not incoherent, as the driver could refuse
844 * to start.
845 *
846 * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
847 * helping guest to notify the new location with vDPA devices that does not
848 * support it.
849 */
850 if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
851 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
852 }
853
75ebec11 854 return features;
fbe78f4f
AL
855}
856
019a3edb 857static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
8eca6b1b 858{
019a3edb 859 uint64_t features = 0;
8eca6b1b
AL
860
861 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
862 * but also these: */
0cd09c3a
CH
863 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
864 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
865 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
866 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
867 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
8eca6b1b 868
8172539d 869 return features;
8eca6b1b
AL
870}
871
644c9858
DF
872static void virtio_net_apply_guest_offloads(VirtIONet *n)
873{
ad37bb3b 874 qemu_set_offload(qemu_get_queue(n->nic)->peer,
644c9858
DF
875 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
876 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
877 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
878 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
2ab0ec31
AM
879 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
880 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
881 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
644c9858
DF
882}
883
53da8b5a 884static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
644c9858
DF
885{
886 static const uint64_t guest_offloads_mask =
887 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
888 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
889 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
890 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
53da8b5a
YB
891 (1ULL << VIRTIO_NET_F_GUEST_UFO) |
892 (1ULL << VIRTIO_NET_F_GUEST_USO4) |
893 (1ULL << VIRTIO_NET_F_GUEST_USO6);
644c9858
DF
894
895 return guest_offloads_mask & features;
896}
897
0b545b1e 898uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
644c9858
DF
899{
900 VirtIODevice *vdev = VIRTIO_DEVICE(n);
901 return virtio_net_guest_offloads_by_features(vdev->guest_features);
902}
903
f5e1847b
JQ
904typedef struct {
905 VirtIONet *n;
12b2fad7
KW
906 DeviceState *dev;
907} FailoverDevice;
f5e1847b
JQ
908
909/**
12b2fad7 910 * Set the failover primary device
f5e1847b
JQ
911 *
912 * @opaque: FailoverId to setup
913 * @opts: opts for device we are handling
914 * @errp: returns an error if this function fails
915 */
12b2fad7 916static int failover_set_primary(DeviceState *dev, void *opaque)
f5e1847b 917{
12b2fad7
KW
918 FailoverDevice *fdev = opaque;
919 PCIDevice *pci_dev = (PCIDevice *)
920 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
f5e1847b 921
12b2fad7
KW
922 if (!pci_dev) {
923 return 0;
924 }
925
926 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
927 fdev->dev = dev;
f5e1847b
JQ
928 return 1;
929 }
930
931 return 0;
932}
933
21e8709b
JQ
934/**
935 * Find the primary device for this failover virtio-net
936 *
937 * @n: VirtIONet device
938 * @errp: returns an error if this function fails
939 */
940static DeviceState *failover_find_primary_device(VirtIONet *n)
941{
12b2fad7
KW
942 FailoverDevice fdev = {
943 .n = n,
944 };
21e8709b 945
12b2fad7
KW
946 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
947 NULL, NULL, &fdev);
948 return fdev.dev;
21e8709b
JQ
949}
950
9711cd0d
JF
951static void failover_add_primary(VirtIONet *n, Error **errp)
952{
953 Error *err = NULL;
21e8709b 954 DeviceState *dev = failover_find_primary_device(n);
9711cd0d 955
21e8709b 956 if (dev) {
117378bf
JF
957 return;
958 }
959
259a10db 960 if (!n->primary_opts) {
97ca9c59
LV
961 error_setg(errp, "Primary device not found");
962 error_append_hint(errp, "Virtio-net failover will not work. Make "
963 "sure primary device has parameter"
964 " failover_pair_id=%s\n", n->netclient_name);
3abad4a2
JQ
965 return;
966 }
259a10db 967
f3558b1b
KW
968 dev = qdev_device_add_from_qdict(n->primary_opts,
969 n->primary_opts_from_json,
970 &err);
97ca9c59 971 if (err) {
f3558b1b 972 qobject_unref(n->primary_opts);
259a10db 973 n->primary_opts = NULL;
9711cd0d 974 } else {
97ca9c59 975 object_unref(OBJECT(dev));
1c775d65 976 }
2155ceaf 977 error_propagate(errp, err);
9711cd0d
JF
978}
979
d5aaa1b0 980static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
fbe78f4f 981{
17a0ca55 982 VirtIONet *n = VIRTIO_NET(vdev);
9711cd0d 983 Error *err = NULL;
fed699f9
JW
984 int i;
985
75ebec11
MC
986 if (n->mtu_bypass_backend &&
987 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
988 features &= ~(1ULL << VIRTIO_NET_F_MTU);
989 }
990
ef546f12 991 virtio_net_set_multiqueue(n,
59079029 992 virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
95129d6f 993 virtio_has_feature(features, VIRTIO_NET_F_MQ));
fbe78f4f 994
ef546f12 995 virtio_net_set_mrg_rx_bufs(n,
95129d6f
CH
996 virtio_has_feature(features,
997 VIRTIO_NET_F_MRG_RXBUF),
998 virtio_has_feature(features,
e22f0603
YB
999 VIRTIO_F_VERSION_1),
1000 virtio_has_feature(features,
1001 VIRTIO_NET_F_HASH_REPORT));
f5436dd9 1002
2974e916
YB
1003 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
1004 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
1005 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
1006 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
e22f0603 1007 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
2974e916 1008
f5436dd9 1009 if (n->has_vnet_hdr) {
644c9858
DF
1010 n->curr_guest_offloads =
1011 virtio_net_guest_offloads_by_features(features);
1012 virtio_net_apply_guest_offloads(n);
f5436dd9 1013 }
fed699f9 1014
441537f1 1015 for (i = 0; i < n->max_queue_pairs; i++) {
fed699f9
JW
1016 NetClientState *nc = qemu_get_subqueue(n->nic, i);
1017
ed8b4afe 1018 if (!get_vhost_net(nc->peer)) {
fed699f9
JW
1019 continue;
1020 }
ed8b4afe 1021 vhost_net_ack_features(get_vhost_net(nc->peer), features);
c9bdc449
HH
1022
1023 /*
1024 * keep acked_features in NetVhostUserState up-to-date so it
1025 * can't miss any features configured by guest virtio driver.
1026 */
1027 vhost_net_save_acked_features(nc->peer);
dc14a397 1028 }
0b1eaa88 1029
06b636a1 1030 if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
0b1eaa88
SF
1031 memset(n->vlans, 0xff, MAX_VLAN >> 3);
1032 }
9711cd0d
JF
1033
1034 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1035 qapi_event_send_failover_negotiated(n->netclient_name);
e2bde83e 1036 qatomic_set(&n->failover_primary_hidden, false);
9711cd0d
JF
1037 failover_add_primary(n, &err);
1038 if (err) {
1b529d90
LV
1039 if (!qtest_enabled()) {
1040 warn_report_err(err);
1041 } else {
1042 error_free(err);
1043 }
9711cd0d
JF
1044 }
1045 }
fbe78f4f
AL
1046}
1047
002437cd 1048static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
921ac5d0 1049 struct iovec *iov, unsigned int iov_cnt)
002437cd
AL
1050{
1051 uint8_t on;
921ac5d0 1052 size_t s;
b1be4280 1053 NetClientState *nc = qemu_get_queue(n->nic);
002437cd 1054
921ac5d0
MT
1055 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1056 if (s != sizeof(on)) {
1057 return VIRTIO_NET_ERR;
002437cd
AL
1058 }
1059
dd23454b 1060 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
002437cd 1061 n->promisc = on;
dd23454b 1062 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
002437cd 1063 n->allmulti = on;
dd23454b 1064 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
015cb166 1065 n->alluni = on;
dd23454b 1066 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
015cb166 1067 n->nomulti = on;
dd23454b 1068 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
015cb166 1069 n->nouni = on;
dd23454b 1070 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
015cb166 1071 n->nobcast = on;
921ac5d0 1072 } else {
002437cd 1073 return VIRTIO_NET_ERR;
921ac5d0 1074 }
002437cd 1075
b1be4280
AK
1076 rxfilter_notify(nc);
1077
002437cd
AL
1078 return VIRTIO_NET_OK;
1079}
1080
644c9858
DF
1081static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1082 struct iovec *iov, unsigned int iov_cnt)
1083{
1084 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1085 uint64_t offloads;
1086 size_t s;
1087
95129d6f 1088 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
644c9858
DF
1089 return VIRTIO_NET_ERR;
1090 }
1091
1092 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1093 if (s != sizeof(offloads)) {
1094 return VIRTIO_NET_ERR;
1095 }
1096
1097 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1098 uint64_t supported_offloads;
1099
189ae6bb
JW
1100 offloads = virtio_ldq_p(vdev, &offloads);
1101
644c9858
DF
1102 if (!n->has_vnet_hdr) {
1103 return VIRTIO_NET_ERR;
1104 }
1105
2974e916
YB
1106 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1107 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1108 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1109 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1110 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1111
644c9858
DF
1112 supported_offloads = virtio_net_supported_guest_offloads(n);
1113 if (offloads & ~supported_offloads) {
1114 return VIRTIO_NET_ERR;
1115 }
1116
1117 n->curr_guest_offloads = offloads;
1118 virtio_net_apply_guest_offloads(n);
1119
1120 return VIRTIO_NET_OK;
1121 } else {
1122 return VIRTIO_NET_ERR;
1123 }
1124}
1125
b6503ed9 1126static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
921ac5d0 1127 struct iovec *iov, unsigned int iov_cnt)
b6503ed9 1128{
1399c60d 1129 VirtIODevice *vdev = VIRTIO_DEVICE(n);
b6503ed9 1130 struct virtio_net_ctrl_mac mac_data;
921ac5d0 1131 size_t s;
b1be4280 1132 NetClientState *nc = qemu_get_queue(n->nic);
b6503ed9 1133
c1943a3f
AK
1134 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1135 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1136 return VIRTIO_NET_ERR;
1137 }
1138 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1139 assert(s == sizeof(n->mac));
b356f76d 1140 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
b1be4280
AK
1141 rxfilter_notify(nc);
1142
c1943a3f
AK
1143 return VIRTIO_NET_OK;
1144 }
1145
921ac5d0 1146 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
b6503ed9 1147 return VIRTIO_NET_ERR;
921ac5d0 1148 }
b6503ed9 1149
cae2e556
AK
1150 int in_use = 0;
1151 int first_multi = 0;
1152 uint8_t uni_overflow = 0;
1153 uint8_t multi_overflow = 0;
1154 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
b6503ed9 1155
921ac5d0
MT
1156 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1157 sizeof(mac_data.entries));
1399c60d 1158 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
921ac5d0 1159 if (s != sizeof(mac_data.entries)) {
b1be4280 1160 goto error;
921ac5d0
MT
1161 }
1162 iov_discard_front(&iov, &iov_cnt, s);
b6503ed9 1163
921ac5d0 1164 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
b1be4280 1165 goto error;
921ac5d0 1166 }
b6503ed9
AL
1167
1168 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
cae2e556 1169 s = iov_to_buf(iov, iov_cnt, 0, macs,
921ac5d0
MT
1170 mac_data.entries * ETH_ALEN);
1171 if (s != mac_data.entries * ETH_ALEN) {
b1be4280 1172 goto error;
921ac5d0 1173 }
cae2e556 1174 in_use += mac_data.entries;
b6503ed9 1175 } else {
cae2e556 1176 uni_overflow = 1;
b6503ed9
AL
1177 }
1178
921ac5d0
MT
1179 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1180
cae2e556 1181 first_multi = in_use;
2d9aba39 1182
921ac5d0
MT
1183 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1184 sizeof(mac_data.entries));
1399c60d 1185 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
921ac5d0 1186 if (s != sizeof(mac_data.entries)) {
b1be4280 1187 goto error;
921ac5d0
MT
1188 }
1189
1190 iov_discard_front(&iov, &iov_cnt, s);
b6503ed9 1191
921ac5d0 1192 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
b1be4280 1193 goto error;
921ac5d0 1194 }
b6503ed9 1195
edc24385 1196 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
cae2e556 1197 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
921ac5d0
MT
1198 mac_data.entries * ETH_ALEN);
1199 if (s != mac_data.entries * ETH_ALEN) {
b1be4280 1200 goto error;
8fd2a2f1 1201 }
cae2e556 1202 in_use += mac_data.entries;
921ac5d0 1203 } else {
cae2e556 1204 multi_overflow = 1;
b6503ed9
AL
1205 }
1206
cae2e556
AK
1207 n->mac_table.in_use = in_use;
1208 n->mac_table.first_multi = first_multi;
1209 n->mac_table.uni_overflow = uni_overflow;
1210 n->mac_table.multi_overflow = multi_overflow;
1211 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1212 g_free(macs);
b1be4280
AK
1213 rxfilter_notify(nc);
1214
b6503ed9 1215 return VIRTIO_NET_OK;
b1be4280
AK
1216
1217error:
cae2e556 1218 g_free(macs);
b1be4280 1219 return VIRTIO_NET_ERR;
b6503ed9
AL
1220}
1221
f21c0ed9 1222static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
921ac5d0 1223 struct iovec *iov, unsigned int iov_cnt)
f21c0ed9 1224{
1399c60d 1225 VirtIODevice *vdev = VIRTIO_DEVICE(n);
f21c0ed9 1226 uint16_t vid;
921ac5d0 1227 size_t s;
b1be4280 1228 NetClientState *nc = qemu_get_queue(n->nic);
f21c0ed9 1229
921ac5d0 1230 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1399c60d 1231 vid = virtio_lduw_p(vdev, &vid);
921ac5d0 1232 if (s != sizeof(vid)) {
f21c0ed9
AL
1233 return VIRTIO_NET_ERR;
1234 }
1235
f21c0ed9
AL
1236 if (vid >= MAX_VLAN)
1237 return VIRTIO_NET_ERR;
1238
1239 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1240 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1241 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1242 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1243 else
1244 return VIRTIO_NET_ERR;
1245
b1be4280
AK
1246 rxfilter_notify(nc);
1247
f21c0ed9
AL
1248 return VIRTIO_NET_OK;
1249}
1250
f57fcf70
JW
1251static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1252 struct iovec *iov, unsigned int iov_cnt)
1253{
9d8c6a25 1254 trace_virtio_net_handle_announce(n->announce_timer.round);
f57fcf70
JW
1255 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1256 n->status & VIRTIO_NET_S_ANNOUNCE) {
1257 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
9d8c6a25
DDAG
1258 if (n->announce_timer.round) {
1259 qemu_announce_timer_step(&n->announce_timer);
f57fcf70
JW
1260 }
1261 return VIRTIO_NET_OK;
1262 } else {
1263 return VIRTIO_NET_ERR;
1264 }
1265}
1266
0145c393
AM
1267static void virtio_net_detach_epbf_rss(VirtIONet *n);
1268
59079029
YB
1269static void virtio_net_disable_rss(VirtIONet *n)
1270{
1271 if (n->rss_data.enabled) {
1272 trace_virtio_net_rss_disable();
1273 }
1274 n->rss_data.enabled = false;
0145c393
AM
1275
1276 virtio_net_detach_epbf_rss(n);
1277}
1278
1279static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1280{
1281 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1282 if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1283 return false;
1284 }
1285
1286 return nc->info->set_steering_ebpf(nc, prog_fd);
1287}
1288
1289static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1290 struct EBPFRSSConfig *config)
1291{
1292 config->redirect = data->redirect;
1293 config->populate_hash = data->populate_hash;
1294 config->hash_types = data->hash_types;
1295 config->indirections_len = data->indirections_len;
1296 config->default_queue = data->default_queue;
1297}
1298
1299static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1300{
1301 struct EBPFRSSConfig config = {};
1302
1303 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1304 return false;
1305 }
1306
1307 rss_data_to_rss_config(&n->rss_data, &config);
1308
1309 if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1310 n->rss_data.indirections_table, n->rss_data.key)) {
1311 return false;
1312 }
1313
1314 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1315 return false;
1316 }
1317
1318 return true;
1319}
1320
1321static void virtio_net_detach_epbf_rss(VirtIONet *n)
1322{
1323 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1324}
1325
1326static bool virtio_net_load_ebpf(VirtIONet *n)
1327{
1328 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
2431f4f1 1329 /* backend doesn't support steering ebpf */
0145c393
AM
1330 return false;
1331 }
1332
1333 return ebpf_rss_load(&n->ebpf_rss);
1334}
1335
1336static void virtio_net_unload_ebpf(VirtIONet *n)
1337{
1338 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1339 ebpf_rss_unload(&n->ebpf_rss);
59079029
YB
1340}
1341
1342static uint16_t virtio_net_handle_rss(VirtIONet *n,
e22f0603
YB
1343 struct iovec *iov,
1344 unsigned int iov_cnt,
1345 bool do_rss)
59079029
YB
1346{
1347 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1348 struct virtio_net_rss_config cfg;
1349 size_t s, offset = 0, size_get;
441537f1 1350 uint16_t queue_pairs, i;
59079029
YB
1351 struct {
1352 uint16_t us;
1353 uint8_t b;
1354 } QEMU_PACKED temp;
1355 const char *err_msg = "";
1356 uint32_t err_value = 0;
1357
e22f0603 1358 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
59079029
YB
1359 err_msg = "RSS is not negotiated";
1360 goto error;
1361 }
e22f0603
YB
1362 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1363 err_msg = "Hash report is not negotiated";
1364 goto error;
1365 }
59079029
YB
1366 size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1367 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1368 if (s != size_get) {
1369 err_msg = "Short command buffer";
1370 err_value = (uint32_t)s;
1371 goto error;
1372 }
1373 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1374 n->rss_data.indirections_len =
1375 virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1376 n->rss_data.indirections_len++;
e22f0603
YB
1377 if (!do_rss) {
1378 n->rss_data.indirections_len = 1;
1379 }
59079029
YB
1380 if (!is_power_of_2(n->rss_data.indirections_len)) {
1381 err_msg = "Invalid size of indirection table";
1382 err_value = n->rss_data.indirections_len;
1383 goto error;
1384 }
1385 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1386 err_msg = "Too large indirection table";
1387 err_value = n->rss_data.indirections_len;
1388 goto error;
1389 }
e22f0603
YB
1390 n->rss_data.default_queue = do_rss ?
1391 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
441537f1 1392 if (n->rss_data.default_queue >= n->max_queue_pairs) {
59079029
YB
1393 err_msg = "Invalid default queue";
1394 err_value = n->rss_data.default_queue;
1395 goto error;
1396 }
1397 offset += size_get;
1398 size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1399 g_free(n->rss_data.indirections_table);
1400 n->rss_data.indirections_table = g_malloc(size_get);
1401 if (!n->rss_data.indirections_table) {
1402 err_msg = "Can't allocate indirections table";
1403 err_value = n->rss_data.indirections_len;
1404 goto error;
1405 }
1406 s = iov_to_buf(iov, iov_cnt, offset,
1407 n->rss_data.indirections_table, size_get);
1408 if (s != size_get) {
1409 err_msg = "Short indirection table buffer";
1410 err_value = (uint32_t)s;
1411 goto error;
1412 }
1413 for (i = 0; i < n->rss_data.indirections_len; ++i) {
1414 uint16_t val = n->rss_data.indirections_table[i];
1415 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1416 }
1417 offset += size_get;
1418 size_get = sizeof(temp);
1419 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1420 if (s != size_get) {
441537f1 1421 err_msg = "Can't get queue_pairs";
59079029
YB
1422 err_value = (uint32_t)s;
1423 goto error;
1424 }
441537f1
JW
1425 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1426 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1427 err_msg = "Invalid number of queue_pairs";
1428 err_value = queue_pairs;
59079029
YB
1429 goto error;
1430 }
1431 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1432 err_msg = "Invalid key size";
1433 err_value = temp.b;
1434 goto error;
1435 }
1436 if (!temp.b && n->rss_data.hash_types) {
1437 err_msg = "No key provided";
1438 err_value = 0;
1439 goto error;
1440 }
1441 if (!temp.b && !n->rss_data.hash_types) {
1442 virtio_net_disable_rss(n);
441537f1 1443 return queue_pairs;
59079029
YB
1444 }
1445 offset += size_get;
1446 size_get = temp.b;
1447 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1448 if (s != size_get) {
1449 err_msg = "Can get key buffer";
1450 err_value = (uint32_t)s;
1451 goto error;
1452 }
1453 n->rss_data.enabled = true;
0145c393
AM
1454
1455 if (!n->rss_data.populate_hash) {
1456 if (!virtio_net_attach_epbf_rss(n)) {
1457 /* EBPF must be loaded for vhost */
1458 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1459 warn_report("Can't load eBPF RSS for vhost");
1460 goto error;
1461 }
1462 /* fallback to software RSS */
1463 warn_report("Can't load eBPF RSS - fallback to software RSS");
1464 n->rss_data.enabled_software_rss = true;
1465 }
1466 } else {
1467 /* use software RSS for hash populating */
1468 /* and detach eBPF if was loaded before */
1469 virtio_net_detach_epbf_rss(n);
1470 n->rss_data.enabled_software_rss = true;
1471 }
1472
59079029
YB
1473 trace_virtio_net_rss_enable(n->rss_data.hash_types,
1474 n->rss_data.indirections_len,
1475 temp.b);
441537f1 1476 return queue_pairs;
59079029
YB
1477error:
1478 trace_virtio_net_rss_error(err_msg, err_value);
1479 virtio_net_disable_rss(n);
1480 return 0;
1481}
1482
fed699f9 1483static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
f8f7c533 1484 struct iovec *iov, unsigned int iov_cnt)
fed699f9 1485{
17a0ca55 1486 VirtIODevice *vdev = VIRTIO_DEVICE(n);
441537f1 1487 uint16_t queue_pairs;
2a7888cc 1488 NetClientState *nc = qemu_get_queue(n->nic);
fed699f9 1489
59079029 1490 virtio_net_disable_rss(n);
e22f0603 1491 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
441537f1
JW
1492 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1493 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
e22f0603 1494 }
59079029 1495 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
441537f1 1496 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
59079029
YB
1497 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1498 struct virtio_net_ctrl_mq mq;
1499 size_t s;
1500 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1501 return VIRTIO_NET_ERR;
1502 }
1503 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1504 if (s != sizeof(mq)) {
1505 return VIRTIO_NET_ERR;
1506 }
441537f1 1507 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
fed699f9 1508
59079029 1509 } else {
fed699f9
JW
1510 return VIRTIO_NET_ERR;
1511 }
1512
441537f1
JW
1513 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1514 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1515 queue_pairs > n->max_queue_pairs ||
fed699f9
JW
1516 !n->multiqueue) {
1517 return VIRTIO_NET_ERR;
1518 }
1519
ca8717f9 1520 n->curr_queue_pairs = queue_pairs;
2a7888cc 1521 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
ca8717f9
EP
1522 /*
1523 * Avoid updating the backend for a vdpa device: We're only interested
1524 * in updating the device model queues.
1525 */
1526 return VIRTIO_NET_OK;
2a7888cc 1527 }
441537f1 1528 /* stop the backend before changing the number of queue_pairs to avoid handling a
fed699f9 1529 * disabled queue */
17a0ca55 1530 virtio_net_set_status(vdev, vdev->status);
441537f1 1531 virtio_net_set_queue_pairs(n);
fed699f9
JW
1532
1533 return VIRTIO_NET_OK;
1534}
ba7eadb5 1535
640b8a1c
EP
1536size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1537 const struct iovec *in_sg, unsigned in_num,
1538 const struct iovec *out_sg,
1539 unsigned out_num)
3d11d36c 1540{
17a0ca55 1541 VirtIONet *n = VIRTIO_NET(vdev);
3d11d36c
AL
1542 struct virtio_net_ctrl_hdr ctrl;
1543 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
921ac5d0 1544 size_t s;
771b6ed3 1545 struct iovec *iov, *iov2;
640b8a1c
EP
1546
1547 if (iov_size(in_sg, in_num) < sizeof(status) ||
1548 iov_size(out_sg, out_num) < sizeof(ctrl)) {
1549 virtio_error(vdev, "virtio-net ctrl missing headers");
1550 return 0;
1551 }
1552
1553 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1554 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1555 iov_discard_front(&iov, &out_num, sizeof(ctrl));
1556 if (s != sizeof(ctrl)) {
1557 status = VIRTIO_NET_ERR;
1558 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1559 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1560 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1561 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1562 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1563 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1564 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1565 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1566 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1567 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1568 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1569 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1570 }
1571
1572 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1573 assert(s == sizeof(status));
1574
1575 g_free(iov2);
1576 return sizeof(status);
1577}
1578
1579static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1580{
1581 VirtQueueElement *elem;
3d11d36c 1582
51b19ebe 1583 for (;;) {
640b8a1c 1584 size_t written;
51b19ebe
PB
1585 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1586 if (!elem) {
1587 break;
1588 }
640b8a1c
EP
1589
1590 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1591 elem->out_sg, elem->out_num);
1592 if (written > 0) {
1593 virtqueue_push(vq, elem, written);
1594 virtio_notify(vdev, vq);
1595 g_free(elem);
1596 } else {
ba7eadb5
GK
1597 virtqueue_detach_element(vq, elem, 0);
1598 g_free(elem);
1599 break;
3d11d36c 1600 }
3d11d36c
AL
1601 }
1602}
1603
fbe78f4f
AL
1604/* RX */
1605
1606static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1607{
17a0ca55 1608 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 1609 int queue_index = vq2q(virtio_get_queue_index(vq));
8aeff62d 1610
fed699f9 1611 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
fbe78f4f
AL
1612}
1613
b8c4b67e 1614static bool virtio_net_can_receive(NetClientState *nc)
fbe78f4f 1615{
cc1f0f45 1616 VirtIONet *n = qemu_get_nic_opaque(nc);
17a0ca55 1617 VirtIODevice *vdev = VIRTIO_DEVICE(n);
fed699f9 1618 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
0c87e93e 1619
17a0ca55 1620 if (!vdev->vm_running) {
b8c4b67e 1621 return false;
95477323 1622 }
cdd5cc12 1623
441537f1 1624 if (nc->queue_index >= n->curr_queue_pairs) {
b8c4b67e 1625 return false;
fed699f9
JW
1626 }
1627
0c87e93e 1628 if (!virtio_queue_ready(q->rx_vq) ||
17a0ca55 1629 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
b8c4b67e 1630 return false;
0c87e93e 1631 }
fbe78f4f 1632
b8c4b67e 1633 return true;
cdd5cc12
MM
1634}
1635
0c87e93e 1636static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
cdd5cc12 1637{
0c87e93e
JW
1638 VirtIONet *n = q->n;
1639 if (virtio_queue_empty(q->rx_vq) ||
fbe78f4f 1640 (n->mergeable_rx_bufs &&
0c87e93e
JW
1641 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1642 virtio_queue_set_notification(q->rx_vq, 1);
06b12970
TL
1643
1644 /* To avoid a race condition where the guest has made some buffers
1645 * available after the above check but before notification was
1646 * enabled, check for available buffers again.
1647 */
0c87e93e 1648 if (virtio_queue_empty(q->rx_vq) ||
06b12970 1649 (n->mergeable_rx_bufs &&
0c87e93e 1650 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
06b12970 1651 return 0;
0c87e93e 1652 }
fbe78f4f
AL
1653 }
1654
0c87e93e 1655 virtio_queue_set_notification(q->rx_vq, 0);
fbe78f4f
AL
1656 return 1;
1657}
1658
1399c60d 1659static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
032a74a1 1660{
1399c60d
RR
1661 virtio_tswap16s(vdev, &hdr->hdr_len);
1662 virtio_tswap16s(vdev, &hdr->gso_size);
1663 virtio_tswap16s(vdev, &hdr->csum_start);
1664 virtio_tswap16s(vdev, &hdr->csum_offset);
032a74a1
CLG
1665}
1666
1d41b0c1
AL
1667/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1668 * it never finds out that the packets don't have valid checksums. This
1669 * causes dhclient to get upset. Fedora's carried a patch for ages to
1670 * fix this with Xen but it hasn't appeared in an upstream release of
1671 * dhclient yet.
1672 *
1673 * To avoid breaking existing guests, we catch udp packets and add
1674 * checksums. This is terrible but it's better than hacking the guest
1675 * kernels.
1676 *
1677 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1678 * we should provide a mechanism to disable it to avoid polluting the host
1679 * cache.
1680 */
1681static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
22cc84db 1682 uint8_t *buf, size_t size)
1d41b0c1
AL
1683{
1684 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1685 (size > 27 && size < 1500) && /* normal sized MTU */
1686 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1687 (buf[23] == 17) && /* ip.protocol == UDP */
1688 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
f5746335 1689 net_checksum_calculate(buf, size, CSUM_UDP);
1d41b0c1
AL
1690 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1691 }
1692}
1693
280598b7
MT
1694static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1695 const void *buf, size_t size)
fbe78f4f 1696{
3a330134 1697 if (n->has_vnet_hdr) {
22cc84db
MT
1698 /* FIXME this cast is evil */
1699 void *wbuf = (void *)buf;
280598b7
MT
1700 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1701 size - n->host_hdr_len);
1bfa316c
GK
1702
1703 if (n->needs_vnet_hdr_swap) {
1704 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1705 }
280598b7 1706 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
22cc84db
MT
1707 } else {
1708 struct virtio_net_hdr hdr = {
1709 .flags = 0,
1710 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1711 };
1712 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
3a330134 1713 }
fbe78f4f
AL
1714}
1715
3831ab20
AL
1716static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1717{
1718 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
f21c0ed9 1719 static const uint8_t vlan[] = {0x81, 0x00};
3831ab20 1720 uint8_t *ptr = (uint8_t *)buf;
b6503ed9 1721 int i;
3831ab20
AL
1722
1723 if (n->promisc)
1724 return 1;
1725
e043ebc6 1726 ptr += n->host_hdr_len;
3a330134 1727
f21c0ed9 1728 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
7542d3e7 1729 int vid = lduw_be_p(ptr + 14) & 0xfff;
f21c0ed9
AL
1730 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1731 return 0;
1732 }
1733
bbe2f399
AW
1734 if (ptr[0] & 1) { // multicast
1735 if (!memcmp(ptr, bcast, sizeof(bcast))) {
015cb166
AW
1736 return !n->nobcast;
1737 } else if (n->nomulti) {
1738 return 0;
8fd2a2f1 1739 } else if (n->allmulti || n->mac_table.multi_overflow) {
bbe2f399
AW
1740 return 1;
1741 }
2d9aba39
AW
1742
1743 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1744 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1745 return 1;
1746 }
1747 }
bbe2f399 1748 } else { // unicast
015cb166
AW
1749 if (n->nouni) {
1750 return 0;
1751 } else if (n->alluni || n->mac_table.uni_overflow) {
8fd2a2f1
AW
1752 return 1;
1753 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
bbe2f399
AW
1754 return 1;
1755 }
3831ab20 1756
2d9aba39
AW
1757 for (i = 0; i < n->mac_table.first_multi; i++) {
1758 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1759 return 1;
1760 }
1761 }
b6503ed9
AL
1762 }
1763
3831ab20
AL
1764 return 0;
1765}
1766
69ff5ef8
AO
1767static uint8_t virtio_net_get_hash_type(bool hasip4,
1768 bool hasip6,
65f474bb 1769 EthL4HdrProto l4hdr_proto,
4474e37a
YB
1770 uint32_t types)
1771{
69ff5ef8 1772 if (hasip4) {
65f474bb
AO
1773 switch (l4hdr_proto) {
1774 case ETH_L4_HDR_PROTO_TCP:
1775 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1776 return NetPktRssIpV4Tcp;
1777 }
1778 break;
1779
1780 case ETH_L4_HDR_PROTO_UDP:
1781 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1782 return NetPktRssIpV4Udp;
1783 }
1784 break;
1785
1786 default:
1787 break;
4474e37a 1788 }
65f474bb 1789
4474e37a
YB
1790 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1791 return NetPktRssIpV4;
1792 }
69ff5ef8 1793 } else if (hasip6) {
65f474bb
AO
1794 switch (l4hdr_proto) {
1795 case ETH_L4_HDR_PROTO_TCP:
1796 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1797 return NetPktRssIpV6TcpEx;
1798 }
1799 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1800 return NetPktRssIpV6Tcp;
1801 }
1802 break;
4474e37a 1803
65f474bb
AO
1804 case ETH_L4_HDR_PROTO_UDP:
1805 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1806 return NetPktRssIpV6UdpEx;
1807 }
1808 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1809 return NetPktRssIpV6Udp;
1810 }
1811 break;
1812
1813 default:
1814 break;
4474e37a 1815 }
65f474bb
AO
1816
1817 if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1818 return NetPktRssIpV6Ex;
4474e37a 1819 }
65f474bb
AO
1820 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1821 return NetPktRssIpV6;
4474e37a
YB
1822 }
1823 }
1824 return 0xff;
1825}
1826
e22f0603
YB
1827static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1828 uint32_t hash)
1829{
1830 struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1831 hdr->hash_value = hash;
1832 hdr->hash_report = report;
1833}
1834
4474e37a
YB
1835static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1836 size_t size)
1837{
1838 VirtIONet *n = qemu_get_nic_opaque(nc);
e22f0603 1839 unsigned int index = nc->queue_index, new_index = index;
4474e37a
YB
1840 struct NetRxPkt *pkt = n->rx_pkt;
1841 uint8_t net_hash_type;
1842 uint32_t hash;
65f474bb
AO
1843 bool hasip4, hasip6;
1844 EthL4HdrProto l4hdr_proto;
e22f0603
YB
1845 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1846 VIRTIO_NET_HASH_REPORT_IPv4,
1847 VIRTIO_NET_HASH_REPORT_TCPv4,
1848 VIRTIO_NET_HASH_REPORT_TCPv6,
1849 VIRTIO_NET_HASH_REPORT_IPv6,
1850 VIRTIO_NET_HASH_REPORT_IPv6_EX,
1851 VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1852 VIRTIO_NET_HASH_REPORT_UDPv4,
1853 VIRTIO_NET_HASH_REPORT_UDPv6,
1854 VIRTIO_NET_HASH_REPORT_UDPv6_EX
1855 };
2f0fa232
AO
1856 struct iovec iov = {
1857 .iov_base = (void *)buf,
1858 .iov_len = size
1859 };
4474e37a 1860
2f0fa232 1861 net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
65f474bb
AO
1862 net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1863 net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
4474e37a
YB
1864 n->rss_data.hash_types);
1865 if (net_hash_type > NetPktRssIpV6UdpEx) {
e22f0603
YB
1866 if (n->rss_data.populate_hash) {
1867 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1868 }
1869 return n->rss_data.redirect ? n->rss_data.default_queue : -1;
4474e37a
YB
1870 }
1871
1872 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
e22f0603
YB
1873
1874 if (n->rss_data.populate_hash) {
1875 virtio_set_packet_hash(buf, reports[net_hash_type], hash);
4474e37a 1876 }
e22f0603
YB
1877
1878 if (n->rss_data.redirect) {
1879 new_index = hash & (n->rss_data.indirections_len - 1);
1880 new_index = n->rss_data.indirections_table[new_index];
1881 }
1882
1883 return (index == new_index) ? -1 : new_index;
4474e37a
YB
1884}
1885
97cd965c 1886static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
4474e37a 1887 size_t size, bool no_rss)
fbe78f4f 1888{
cc1f0f45 1889 VirtIONet *n = qemu_get_nic_opaque(nc);
fed699f9 1890 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
17a0ca55 1891 VirtIODevice *vdev = VIRTIO_DEVICE(n);
bedd7e93
JW
1892 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1893 size_t lens[VIRTQUEUE_MAX_SIZE];
63c58728
MT
1894 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1895 struct virtio_net_hdr_mrg_rxbuf mhdr;
1896 unsigned mhdr_cnt = 0;
bedd7e93
JW
1897 size_t offset, i, guest_offset, j;
1898 ssize_t err;
fbe78f4f 1899
fed699f9 1900 if (!virtio_net_can_receive(nc)) {
cdd5cc12 1901 return -1;
b356f76d 1902 }
cdd5cc12 1903
0145c393 1904 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
4474e37a
YB
1905 int index = virtio_net_process_rss(nc, buf, size);
1906 if (index >= 0) {
1907 NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1908 return virtio_net_receive_rcu(nc2, buf, size, true);
1909 }
1910 }
1911
940cda94 1912 /* hdr_len refers to the header we supply to the guest */
0c87e93e 1913 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
8aeff62d 1914 return 0;
0c87e93e 1915 }
fbe78f4f 1916
3831ab20 1917 if (!receive_filter(n, buf, size))
4f1c942b 1918 return size;
3831ab20 1919
fbe78f4f
AL
1920 offset = i = 0;
1921
1922 while (offset < size) {
51b19ebe 1923 VirtQueueElement *elem;
fbe78f4f 1924 int len, total;
51b19ebe 1925 const struct iovec *sg;
fbe78f4f 1926
22c253d9 1927 total = 0;
fbe78f4f 1928
bedd7e93
JW
1929 if (i == VIRTQUEUE_MAX_SIZE) {
1930 virtio_error(vdev, "virtio-net unexpected long buffer chain");
1931 err = size;
1932 goto err;
1933 }
1934
51b19ebe
PB
1935 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1936 if (!elem) {
ba10b9c0
GK
1937 if (i) {
1938 virtio_error(vdev, "virtio-net unexpected empty queue: "
1939 "i %zd mergeable %d offset %zd, size %zd, "
1940 "guest hdr len %zd, host hdr len %zd "
1941 "guest features 0x%" PRIx64,
1942 i, n->mergeable_rx_bufs, offset, size,
1943 n->guest_hdr_len, n->host_hdr_len,
1944 vdev->guest_features);
1945 }
bedd7e93
JW
1946 err = -1;
1947 goto err;
fbe78f4f
AL
1948 }
1949
51b19ebe 1950 if (elem->in_num < 1) {
ba10b9c0
GK
1951 virtio_error(vdev,
1952 "virtio-net receive queue contains no in buffers");
1953 virtqueue_detach_element(q->rx_vq, elem, 0);
1954 g_free(elem);
bedd7e93
JW
1955 err = -1;
1956 goto err;
fbe78f4f
AL
1957 }
1958
51b19ebe 1959 sg = elem->in_sg;
fbe78f4f 1960 if (i == 0) {
c8d28e7e 1961 assert(offset == 0);
63c58728
MT
1962 if (n->mergeable_rx_bufs) {
1963 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
51b19ebe 1964 sg, elem->in_num,
63c58728
MT
1965 offsetof(typeof(mhdr), num_buffers),
1966 sizeof(mhdr.num_buffers));
1967 }
fbe78f4f 1968
51b19ebe 1969 receive_header(n, sg, elem->in_num, buf, size);
e22f0603
YB
1970 if (n->rss_data.populate_hash) {
1971 offset = sizeof(mhdr);
1972 iov_from_buf(sg, elem->in_num, offset,
1973 buf + offset, n->host_hdr_len - sizeof(mhdr));
1974 }
c8d28e7e 1975 offset = n->host_hdr_len;
e35e23f6 1976 total += n->guest_hdr_len;
22cc84db
MT
1977 guest_offset = n->guest_hdr_len;
1978 } else {
1979 guest_offset = 0;
fbe78f4f
AL
1980 }
1981
1982 /* copy in packet. ugh */
51b19ebe 1983 len = iov_from_buf(sg, elem->in_num, guest_offset,
dcf6f5e1 1984 buf + offset, size - offset);
fbe78f4f 1985 total += len;
279a4253
MT
1986 offset += len;
1987 /* If buffers can't be merged, at this point we
1988 * must have consumed the complete packet.
1989 * Otherwise, drop it. */
1990 if (!n->mergeable_rx_bufs && offset < size) {
27e57efe 1991 virtqueue_unpop(q->rx_vq, elem, total);
51b19ebe 1992 g_free(elem);
bedd7e93
JW
1993 err = size;
1994 goto err;
279a4253 1995 }
fbe78f4f 1996
bedd7e93
JW
1997 elems[i] = elem;
1998 lens[i] = total;
1999 i++;
fbe78f4f
AL
2000 }
2001
63c58728 2002 if (mhdr_cnt) {
1399c60d 2003 virtio_stw_p(vdev, &mhdr.num_buffers, i);
63c58728
MT
2004 iov_from_buf(mhdr_sg, mhdr_cnt,
2005 0,
2006 &mhdr.num_buffers, sizeof mhdr.num_buffers);
44b15bc5 2007 }
fbe78f4f 2008
bedd7e93
JW
2009 for (j = 0; j < i; j++) {
2010 /* signal other side */
2011 virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
2012 g_free(elems[j]);
2013 }
2014
0c87e93e 2015 virtqueue_flush(q->rx_vq, i);
17a0ca55 2016 virtio_notify(vdev, q->rx_vq);
4f1c942b
MM
2017
2018 return size;
bedd7e93
JW
2019
2020err:
2021 for (j = 0; j < i; j++) {
abe300d9 2022 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
bedd7e93
JW
2023 g_free(elems[j]);
2024 }
2025
2026 return err;
fbe78f4f
AL
2027}
2028
2974e916 2029static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
97cd965c
PB
2030 size_t size)
2031{
068ddfa9 2032 RCU_READ_LOCK_GUARD();
97cd965c 2033
4474e37a 2034 return virtio_net_receive_rcu(nc, buf, size, false);
97cd965c
PB
2035}
2036
2974e916
YB
2037static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2038 const uint8_t *buf,
2039 VirtioNetRscUnit *unit)
2040{
2041 uint16_t ip_hdrlen;
2042 struct ip_header *ip;
2043
2044 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2045 + sizeof(struct eth_header));
2046 unit->ip = (void *)ip;
2047 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2048 unit->ip_plen = &ip->ip_len;
2049 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2050 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2051 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
2052}
2053
2054static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2055 const uint8_t *buf,
2056 VirtioNetRscUnit *unit)
2057{
2058 struct ip6_header *ip6;
2059
2060 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2061 + sizeof(struct eth_header));
2062 unit->ip = ip6;
2063 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
78ee6bd0 2064 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2974e916
YB
2065 + sizeof(struct ip6_header));
2066 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2067
2431f4f1 2068 /* There is a difference between payload length in ipv4 and v6,
2974e916
YB
2069 ip header is excluded in ipv6 */
2070 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
2071}
2072
2073static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2074 VirtioNetRscSeg *seg)
2075{
2076 int ret;
dd3d85e8 2077 struct virtio_net_hdr_v1 *h;
2974e916 2078
dd3d85e8 2079 h = (struct virtio_net_hdr_v1 *)seg->buf;
2974e916
YB
2080 h->flags = 0;
2081 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2082
2083 if (seg->is_coalesced) {
dd3d85e8
YB
2084 h->rsc.segments = seg->packets;
2085 h->rsc.dup_acks = seg->dup_ack;
2974e916
YB
2086 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2087 if (chain->proto == ETH_P_IP) {
2088 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2089 } else {
2090 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2091 }
2092 }
2093
2094 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2095 QTAILQ_REMOVE(&chain->buffers, seg, next);
2096 g_free(seg->buf);
2097 g_free(seg);
2098
2099 return ret;
2100}
2101
2102static void virtio_net_rsc_purge(void *opq)
2103{
2104 VirtioNetRscSeg *seg, *rn;
2105 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2106
2107 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2108 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2109 chain->stat.purge_failed++;
2110 continue;
2111 }
2112 }
2113
2114 chain->stat.timer++;
2115 if (!QTAILQ_EMPTY(&chain->buffers)) {
2116 timer_mod(chain->drain_timer,
2117 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2118 }
2119}
2120
2121static void virtio_net_rsc_cleanup(VirtIONet *n)
2122{
2123 VirtioNetRscChain *chain, *rn_chain;
2124 VirtioNetRscSeg *seg, *rn_seg;
2125
2126 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2127 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2128 QTAILQ_REMOVE(&chain->buffers, seg, next);
2129 g_free(seg->buf);
2130 g_free(seg);
2131 }
2132
2974e916
YB
2133 timer_free(chain->drain_timer);
2134 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2135 g_free(chain);
2136 }
2137}
2138
2139static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2140 NetClientState *nc,
2141 const uint8_t *buf, size_t size)
2142{
2143 uint16_t hdr_len;
2144 VirtioNetRscSeg *seg;
2145
2146 hdr_len = chain->n->guest_hdr_len;
b21e2380 2147 seg = g_new(VirtioNetRscSeg, 1);
2974e916
YB
2148 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2149 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2150 memcpy(seg->buf, buf, size);
2151 seg->size = size;
2152 seg->packets = 1;
2153 seg->dup_ack = 0;
2154 seg->is_coalesced = 0;
2155 seg->nc = nc;
2156
2157 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2158 chain->stat.cache++;
2159
2160 switch (chain->proto) {
2161 case ETH_P_IP:
2162 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2163 break;
2164 case ETH_P_IPV6:
2165 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2166 break;
2167 default:
2168 g_assert_not_reached();
2169 }
2170}
2171
2172static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2173 VirtioNetRscSeg *seg,
2174 const uint8_t *buf,
2175 struct tcp_header *n_tcp,
2176 struct tcp_header *o_tcp)
2177{
2178 uint32_t nack, oack;
2179 uint16_t nwin, owin;
2180
2181 nack = htonl(n_tcp->th_ack);
2182 nwin = htons(n_tcp->th_win);
2183 oack = htonl(o_tcp->th_ack);
2184 owin = htons(o_tcp->th_win);
2185
2186 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2187 chain->stat.ack_out_of_win++;
2188 return RSC_FINAL;
2189 } else if (nack == oack) {
2190 /* duplicated ack or window probe */
2191 if (nwin == owin) {
2192 /* duplicated ack, add dup ack count due to whql test up to 1 */
2193 chain->stat.dup_ack++;
2194 return RSC_FINAL;
2195 } else {
2196 /* Coalesce window update */
2197 o_tcp->th_win = n_tcp->th_win;
2198 chain->stat.win_update++;
2199 return RSC_COALESCE;
2200 }
2201 } else {
2202 /* pure ack, go to 'C', finalize*/
2203 chain->stat.pure_ack++;
2204 return RSC_FINAL;
2205 }
2206}
2207
2208static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2209 VirtioNetRscSeg *seg,
2210 const uint8_t *buf,
2211 VirtioNetRscUnit *n_unit)
2212{
2213 void *data;
2214 uint16_t o_ip_len;
2215 uint32_t nseq, oseq;
2216 VirtioNetRscUnit *o_unit;
2217
2218 o_unit = &seg->unit;
2219 o_ip_len = htons(*o_unit->ip_plen);
2220 nseq = htonl(n_unit->tcp->th_seq);
2221 oseq = htonl(o_unit->tcp->th_seq);
2222
2223 /* out of order or retransmitted. */
2224 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2225 chain->stat.data_out_of_win++;
2226 return RSC_FINAL;
2227 }
2228
2229 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2230 if (nseq == oseq) {
2231 if ((o_unit->payload == 0) && n_unit->payload) {
2232 /* From no payload to payload, normal case, not a dup ack or etc */
2233 chain->stat.data_after_pure_ack++;
2234 goto coalesce;
2235 } else {
2236 return virtio_net_rsc_handle_ack(chain, seg, buf,
2237 n_unit->tcp, o_unit->tcp);
2238 }
2239 } else if ((nseq - oseq) != o_unit->payload) {
2240 /* Not a consistent packet, out of order */
2241 chain->stat.data_out_of_order++;
2242 return RSC_FINAL;
2243 } else {
2244coalesce:
2245 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2246 chain->stat.over_size++;
2247 return RSC_FINAL;
2248 }
2249
2250 /* Here comes the right data, the payload length in v4/v6 is different,
2251 so use the field value to update and record the new data len */
2252 o_unit->payload += n_unit->payload; /* update new data len */
2253
2254 /* update field in ip header */
2255 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2256
2257 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2258 for windows guest, while this may change the behavior for linux
2259 guest (only if it uses RSC feature). */
2260 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2261
2262 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2263 o_unit->tcp->th_win = n_unit->tcp->th_win;
2264
2265 memmove(seg->buf + seg->size, data, n_unit->payload);
2266 seg->size += n_unit->payload;
2267 seg->packets++;
2268 chain->stat.coalesced++;
2269 return RSC_COALESCE;
2270 }
2271}
2272
2273static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2274 VirtioNetRscSeg *seg,
2275 const uint8_t *buf, size_t size,
2276 VirtioNetRscUnit *unit)
2277{
2278 struct ip_header *ip1, *ip2;
2279
2280 ip1 = (struct ip_header *)(unit->ip);
2281 ip2 = (struct ip_header *)(seg->unit.ip);
2282 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2283 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2284 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2285 chain->stat.no_match++;
2286 return RSC_NO_MATCH;
2287 }
2288
2289 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2290}
2291
2292static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2293 VirtioNetRscSeg *seg,
2294 const uint8_t *buf, size_t size,
2295 VirtioNetRscUnit *unit)
2296{
2297 struct ip6_header *ip1, *ip2;
2298
2299 ip1 = (struct ip6_header *)(unit->ip);
2300 ip2 = (struct ip6_header *)(seg->unit.ip);
2301 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2302 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2303 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2304 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2305 chain->stat.no_match++;
2306 return RSC_NO_MATCH;
2307 }
2308
2309 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2310}
2311
2312/* Packets with 'SYN' should bypass, other flag should be sent after drain
2313 * to prevent out of order */
2314static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2315 struct tcp_header *tcp)
2316{
2317 uint16_t tcp_hdr;
2318 uint16_t tcp_flag;
2319
2320 tcp_flag = htons(tcp->th_offset_flags);
2321 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2322 tcp_flag &= VIRTIO_NET_TCP_FLAG;
2974e916
YB
2323 if (tcp_flag & TH_SYN) {
2324 chain->stat.tcp_syn++;
2325 return RSC_BYPASS;
2326 }
2327
2328 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2329 chain->stat.tcp_ctrl_drain++;
2330 return RSC_FINAL;
2331 }
2332
2333 if (tcp_hdr > sizeof(struct tcp_header)) {
2334 chain->stat.tcp_all_opt++;
2335 return RSC_FINAL;
2336 }
2337
2338 return RSC_CANDIDATE;
2339}
2340
2341static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2342 NetClientState *nc,
2343 const uint8_t *buf, size_t size,
2344 VirtioNetRscUnit *unit)
2345{
2346 int ret;
2347 VirtioNetRscSeg *seg, *nseg;
2348
2349 if (QTAILQ_EMPTY(&chain->buffers)) {
2350 chain->stat.empty_cache++;
2351 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2352 timer_mod(chain->drain_timer,
2353 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2354 return size;
2355 }
2356
2357 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2358 if (chain->proto == ETH_P_IP) {
2359 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2360 } else {
2361 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2362 }
2363
2364 if (ret == RSC_FINAL) {
2365 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2366 /* Send failed */
2367 chain->stat.final_failed++;
2368 return 0;
2369 }
2370
2371 /* Send current packet */
2372 return virtio_net_do_receive(nc, buf, size);
2373 } else if (ret == RSC_NO_MATCH) {
2374 continue;
2375 } else {
2376 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2377 seg->is_coalesced = 1;
2378 return size;
2379 }
2380 }
2381
2382 chain->stat.no_match_cache++;
2383 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2384 return size;
2385}
2386
2387/* Drain a connection data, this is to avoid out of order segments */
2388static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2389 NetClientState *nc,
2390 const uint8_t *buf, size_t size,
2391 uint16_t ip_start, uint16_t ip_size,
2392 uint16_t tcp_port)
2393{
2394 VirtioNetRscSeg *seg, *nseg;
2395 uint32_t ppair1, ppair2;
2396
2397 ppair1 = *(uint32_t *)(buf + tcp_port);
2398 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2399 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2400 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2401 || (ppair1 != ppair2)) {
2402 continue;
2403 }
2404 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2405 chain->stat.drain_failed++;
2406 }
2407
2408 break;
2409 }
2410
2411 return virtio_net_do_receive(nc, buf, size);
2412}
2413
2414static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2415 struct ip_header *ip,
2416 const uint8_t *buf, size_t size)
2417{
2418 uint16_t ip_len;
2419
2420 /* Not an ipv4 packet */
2421 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2422 chain->stat.ip_option++;
2423 return RSC_BYPASS;
2424 }
2425
2426 /* Don't handle packets with ip option */
2427 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2428 chain->stat.ip_option++;
2429 return RSC_BYPASS;
2430 }
2431
2432 if (ip->ip_p != IPPROTO_TCP) {
2433 chain->stat.bypass_not_tcp++;
2434 return RSC_BYPASS;
2435 }
2436
2437 /* Don't handle packets with ip fragment */
2438 if (!(htons(ip->ip_off) & IP_DF)) {
2439 chain->stat.ip_frag++;
2440 return RSC_BYPASS;
2441 }
2442
2443 /* Don't handle packets with ecn flag */
2444 if (IPTOS_ECN(ip->ip_tos)) {
2445 chain->stat.ip_ecn++;
2446 return RSC_BYPASS;
2447 }
2448
2449 ip_len = htons(ip->ip_len);
2450 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2451 || ip_len > (size - chain->n->guest_hdr_len -
2452 sizeof(struct eth_header))) {
2453 chain->stat.ip_hacked++;
2454 return RSC_BYPASS;
2455 }
2456
2457 return RSC_CANDIDATE;
2458}
2459
2460static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2461 NetClientState *nc,
2462 const uint8_t *buf, size_t size)
2463{
2464 int32_t ret;
2465 uint16_t hdr_len;
2466 VirtioNetRscUnit unit;
2467
2468 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2469
2470 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2471 + sizeof(struct tcp_header))) {
2472 chain->stat.bypass_not_tcp++;
2473 return virtio_net_do_receive(nc, buf, size);
2474 }
2475
2476 virtio_net_rsc_extract_unit4(chain, buf, &unit);
2477 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2478 != RSC_CANDIDATE) {
2479 return virtio_net_do_receive(nc, buf, size);
2480 }
2481
2482 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2483 if (ret == RSC_BYPASS) {
2484 return virtio_net_do_receive(nc, buf, size);
2485 } else if (ret == RSC_FINAL) {
2486 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2487 ((hdr_len + sizeof(struct eth_header)) + 12),
2488 VIRTIO_NET_IP4_ADDR_SIZE,
2489 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2490 }
2491
2492 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2493}
2494
2495static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2496 struct ip6_header *ip6,
2497 const uint8_t *buf, size_t size)
2498{
2499 uint16_t ip_len;
2500
2501 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2502 != IP_HEADER_VERSION_6) {
2503 return RSC_BYPASS;
2504 }
2505
2506 /* Both option and protocol is checked in this */
2507 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2508 chain->stat.bypass_not_tcp++;
2509 return RSC_BYPASS;
2510 }
2511
2512 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2513 if (ip_len < sizeof(struct tcp_header) ||
2514 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2515 - sizeof(struct ip6_header))) {
2516 chain->stat.ip_hacked++;
2517 return RSC_BYPASS;
2518 }
2519
2520 /* Don't handle packets with ecn flag */
2521 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2522 chain->stat.ip_ecn++;
2523 return RSC_BYPASS;
2524 }
2525
2526 return RSC_CANDIDATE;
2527}
2528
2529static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2530 const uint8_t *buf, size_t size)
2531{
2532 int32_t ret;
2533 uint16_t hdr_len;
2534 VirtioNetRscChain *chain;
2535 VirtioNetRscUnit unit;
2536
3d558330 2537 chain = opq;
2974e916
YB
2538 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2539
2540 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2541 + sizeof(tcp_header))) {
2542 return virtio_net_do_receive(nc, buf, size);
2543 }
2544
2545 virtio_net_rsc_extract_unit6(chain, buf, &unit);
2546 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2547 unit.ip, buf, size)) {
2548 return virtio_net_do_receive(nc, buf, size);
2549 }
2550
2551 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2552 if (ret == RSC_BYPASS) {
2553 return virtio_net_do_receive(nc, buf, size);
2554 } else if (ret == RSC_FINAL) {
2555 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2556 ((hdr_len + sizeof(struct eth_header)) + 8),
2557 VIRTIO_NET_IP6_ADDR_SIZE,
2558 hdr_len + sizeof(struct eth_header)
2559 + sizeof(struct ip6_header));
2560 }
2561
2562 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2563}
2564
2565static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2566 NetClientState *nc,
2567 uint16_t proto)
2568{
2569 VirtioNetRscChain *chain;
2570
2571 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2572 return NULL;
2573 }
2574
2575 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2576 if (chain->proto == proto) {
2577 return chain;
2578 }
2579 }
2580
2581 chain = g_malloc(sizeof(*chain));
2582 chain->n = n;
2583 chain->proto = proto;
2584 if (proto == (uint16_t)ETH_P_IP) {
2585 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2586 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2587 } else {
2588 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2589 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2590 }
2591 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2592 virtio_net_rsc_purge, chain);
2593 memset(&chain->stat, 0, sizeof(chain->stat));
2594
2595 QTAILQ_INIT(&chain->buffers);
2596 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2597
2598 return chain;
2599}
2600
2601static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2602 const uint8_t *buf,
2603 size_t size)
2604{
2605 uint16_t proto;
2606 VirtioNetRscChain *chain;
2607 struct eth_header *eth;
2608 VirtIONet *n;
2609
2610 n = qemu_get_nic_opaque(nc);
2611 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2612 return virtio_net_do_receive(nc, buf, size);
2613 }
2614
2615 eth = (struct eth_header *)(buf + n->guest_hdr_len);
2616 proto = htons(eth->h_proto);
2617
2618 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2619 if (chain) {
2620 chain->stat.received++;
2621 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2622 return virtio_net_rsc_receive4(chain, nc, buf, size);
2623 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2624 return virtio_net_rsc_receive6(chain, nc, buf, size);
2625 }
2626 }
2627 return virtio_net_do_receive(nc, buf, size);
2628}
2629
2630static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2631 size_t size)
2632{
2633 VirtIONet *n = qemu_get_nic_opaque(nc);
2634 if ((n->rsc4_enabled || n->rsc6_enabled)) {
2635 return virtio_net_rsc_receive(nc, buf, size);
2636 } else {
2637 return virtio_net_do_receive(nc, buf, size);
2638 }
2639}
2640
0c87e93e 2641static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
6243375f 2642
4e68f7a0 2643static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
6243375f 2644{
cc1f0f45 2645 VirtIONet *n = qemu_get_nic_opaque(nc);
fed699f9 2646 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
17a0ca55 2647 VirtIODevice *vdev = VIRTIO_DEVICE(n);
df8d0708 2648 int ret;
6243375f 2649
51b19ebe 2650 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
17a0ca55 2651 virtio_notify(vdev, q->tx_vq);
6243375f 2652
51b19ebe
PB
2653 g_free(q->async_tx.elem);
2654 q->async_tx.elem = NULL;
6243375f 2655
0c87e93e 2656 virtio_queue_set_notification(q->tx_vq, 1);
df8d0708 2657 ret = virtio_net_flush_tx(q);
7550a822 2658 if (ret >= n->tx_burst) {
df8d0708
LV
2659 /*
2660 * the flush has been stopped by tx_burst
2661 * we will not receive notification for the
2662 * remainining part, so re-schedule
2663 */
2664 virtio_queue_set_notification(q->tx_vq, 0);
7550a822
LV
2665 if (q->tx_bh) {
2666 qemu_bh_schedule(q->tx_bh);
2667 } else {
2668 timer_mod(q->tx_timer,
2669 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2670 }
df8d0708
LV
2671 q->tx_waiting = 1;
2672 }
6243375f
MM
2673}
2674
fbe78f4f 2675/* TX */
0c87e93e 2676static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
fbe78f4f 2677{
0c87e93e 2678 VirtIONet *n = q->n;
17a0ca55 2679 VirtIODevice *vdev = VIRTIO_DEVICE(n);
51b19ebe 2680 VirtQueueElement *elem;
e3f30488 2681 int32_t num_packets = 0;
fed699f9 2682 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
17a0ca55 2683 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
e3f30488
AW
2684 return num_packets;
2685 }
fbe78f4f 2686
51b19ebe 2687 if (q->async_tx.elem) {
0c87e93e 2688 virtio_queue_set_notification(q->tx_vq, 0);
e3f30488 2689 return num_packets;
6243375f
MM
2690 }
2691
51b19ebe 2692 for (;;) {
bd89dd98 2693 ssize_t ret;
51b19ebe
PB
2694 unsigned int out_num;
2695 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
feb93f36 2696 struct virtio_net_hdr_mrg_rxbuf mhdr;
fbe78f4f 2697
51b19ebe
PB
2698 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2699 if (!elem) {
2700 break;
2701 }
2702
2703 out_num = elem->out_num;
2704 out_sg = elem->out_sg;
7b80d08e 2705 if (out_num < 1) {
fa5e56c2
GK
2706 virtio_error(vdev, "virtio-net header not in first element");
2707 virtqueue_detach_element(q->tx_vq, elem, 0);
2708 g_free(elem);
2709 return -EINVAL;
fbe78f4f
AL
2710 }
2711
032a74a1 2712 if (n->has_vnet_hdr) {
feb93f36
JW
2713 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2714 n->guest_hdr_len) {
fa5e56c2
GK
2715 virtio_error(vdev, "virtio-net header incorrect");
2716 virtqueue_detach_element(q->tx_vq, elem, 0);
2717 g_free(elem);
2718 return -EINVAL;
032a74a1 2719 }
1bfa316c 2720 if (n->needs_vnet_hdr_swap) {
feb93f36
JW
2721 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2722 sg2[0].iov_base = &mhdr;
2723 sg2[0].iov_len = n->guest_hdr_len;
2724 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2725 out_sg, out_num,
2726 n->guest_hdr_len, -1);
2727 if (out_num == VIRTQUEUE_MAX_SIZE) {
2728 goto drop;
7d37435b 2729 }
feb93f36
JW
2730 out_num += 1;
2731 out_sg = sg2;
7d37435b 2732 }
032a74a1 2733 }
14761f9c
MT
2734 /*
2735 * If host wants to see the guest header as is, we can
2736 * pass it on unchanged. Otherwise, copy just the parts
2737 * that host is interested in.
2738 */
2739 assert(n->host_hdr_len <= n->guest_hdr_len);
2740 if (n->host_hdr_len != n->guest_hdr_len) {
2741 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2742 out_sg, out_num,
2743 0, n->host_hdr_len);
2744 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2745 out_sg, out_num,
2746 n->guest_hdr_len, -1);
2747 out_num = sg_num;
2748 out_sg = sg;
fbe78f4f
AL
2749 }
2750
fed699f9
JW
2751 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2752 out_sg, out_num, virtio_net_tx_complete);
6243375f 2753 if (ret == 0) {
0c87e93e
JW
2754 virtio_queue_set_notification(q->tx_vq, 0);
2755 q->async_tx.elem = elem;
e3f30488 2756 return -EBUSY;
6243375f
MM
2757 }
2758
feb93f36 2759drop:
51b19ebe 2760 virtqueue_push(q->tx_vq, elem, 0);
17a0ca55 2761 virtio_notify(vdev, q->tx_vq);
51b19ebe 2762 g_free(elem);
e3f30488
AW
2763
2764 if (++num_packets >= n->tx_burst) {
2765 break;
2766 }
fbe78f4f 2767 }
e3f30488 2768 return num_packets;
fbe78f4f
AL
2769}
2770
7550a822
LV
2771static void virtio_net_tx_timer(void *opaque);
2772
a697a334 2773static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
fbe78f4f 2774{
17a0ca55 2775 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 2776 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
fbe78f4f 2777
283e2c2a
YB
2778 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2779 virtio_net_drop_tx_queue_data(vdev, vq);
2780 return;
2781 }
2782
783e7706 2783 /* This happens when device was stopped but VCPU wasn't. */
17a0ca55 2784 if (!vdev->vm_running) {
0c87e93e 2785 q->tx_waiting = 1;
783e7706
MT
2786 return;
2787 }
2788
0c87e93e 2789 if (q->tx_waiting) {
7550a822 2790 /* We already have queued packets, immediately flush */
bc72ad67 2791 timer_del(q->tx_timer);
7550a822 2792 virtio_net_tx_timer(q);
fbe78f4f 2793 } else {
7550a822 2794 /* re-arm timer to flush it (and more) on next tick */
bc72ad67 2795 timer_mod(q->tx_timer,
7550a822 2796 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
0c87e93e 2797 q->tx_waiting = 1;
fbe78f4f
AL
2798 virtio_queue_set_notification(vq, 0);
2799 }
2800}
2801
a697a334
AW
2802static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2803{
17a0ca55 2804 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 2805 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
a697a334 2806
283e2c2a
YB
2807 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2808 virtio_net_drop_tx_queue_data(vdev, vq);
2809 return;
2810 }
2811
0c87e93e 2812 if (unlikely(q->tx_waiting)) {
a697a334
AW
2813 return;
2814 }
0c87e93e 2815 q->tx_waiting = 1;
783e7706 2816 /* This happens when device was stopped but VCPU wasn't. */
17a0ca55 2817 if (!vdev->vm_running) {
783e7706
MT
2818 return;
2819 }
a697a334 2820 virtio_queue_set_notification(vq, 0);
0c87e93e 2821 qemu_bh_schedule(q->tx_bh);
a697a334
AW
2822}
2823
fbe78f4f
AL
2824static void virtio_net_tx_timer(void *opaque)
2825{
0c87e93e
JW
2826 VirtIONetQueue *q = opaque;
2827 VirtIONet *n = q->n;
17a0ca55 2828 VirtIODevice *vdev = VIRTIO_DEVICE(n);
7550a822
LV
2829 int ret;
2830
e8bcf842
MT
2831 /* This happens when device was stopped but BH wasn't. */
2832 if (!vdev->vm_running) {
2833 /* Make sure tx waiting is set, so we'll run when restarted. */
2834 assert(q->tx_waiting);
2835 return;
2836 }
fbe78f4f 2837
0c87e93e 2838 q->tx_waiting = 0;
fbe78f4f
AL
2839
2840 /* Just in case the driver is not ready on more */
17a0ca55 2841 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
fbe78f4f 2842 return;
17a0ca55 2843 }
fbe78f4f 2844
7550a822
LV
2845 ret = virtio_net_flush_tx(q);
2846 if (ret == -EBUSY || ret == -EINVAL) {
2847 return;
2848 }
2849 /*
2850 * If we flush a full burst of packets, assume there are
2851 * more coming and immediately rearm
2852 */
2853 if (ret >= n->tx_burst) {
2854 q->tx_waiting = 1;
2855 timer_mod(q->tx_timer,
2856 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2857 return;
2858 }
2859 /*
2860 * If less than a full burst, re-enable notification and flush
2861 * anything that may have come in while we weren't looking. If
2862 * we find something, assume the guest is still active and rearm
2863 */
0c87e93e 2864 virtio_queue_set_notification(q->tx_vq, 1);
7550a822
LV
2865 ret = virtio_net_flush_tx(q);
2866 if (ret > 0) {
2867 virtio_queue_set_notification(q->tx_vq, 0);
2868 q->tx_waiting = 1;
2869 timer_mod(q->tx_timer,
2870 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2871 }
fbe78f4f
AL
2872}
2873
a697a334
AW
2874static void virtio_net_tx_bh(void *opaque)
2875{
0c87e93e
JW
2876 VirtIONetQueue *q = opaque;
2877 VirtIONet *n = q->n;
17a0ca55 2878 VirtIODevice *vdev = VIRTIO_DEVICE(n);
a697a334
AW
2879 int32_t ret;
2880
e8bcf842
MT
2881 /* This happens when device was stopped but BH wasn't. */
2882 if (!vdev->vm_running) {
2883 /* Make sure tx waiting is set, so we'll run when restarted. */
2884 assert(q->tx_waiting);
2885 return;
2886 }
783e7706 2887
0c87e93e 2888 q->tx_waiting = 0;
a697a334
AW
2889
2890 /* Just in case the driver is not ready on more */
17a0ca55 2891 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
a697a334 2892 return;
17a0ca55 2893 }
a697a334 2894
0c87e93e 2895 ret = virtio_net_flush_tx(q);
fa5e56c2
GK
2896 if (ret == -EBUSY || ret == -EINVAL) {
2897 return; /* Notification re-enable handled by tx_complete or device
2898 * broken */
a697a334
AW
2899 }
2900
2901 /* If we flush a full burst of packets, assume there are
2902 * more coming and immediately reschedule */
2903 if (ret >= n->tx_burst) {
0c87e93e
JW
2904 qemu_bh_schedule(q->tx_bh);
2905 q->tx_waiting = 1;
a697a334
AW
2906 return;
2907 }
2908
2909 /* If less than a full burst, re-enable notification and flush
2910 * anything that may have come in while we weren't looking. If
2911 * we find something, assume the guest is still active and reschedule */
0c87e93e 2912 virtio_queue_set_notification(q->tx_vq, 1);
fa5e56c2
GK
2913 ret = virtio_net_flush_tx(q);
2914 if (ret == -EINVAL) {
2915 return;
2916 } else if (ret > 0) {
0c87e93e
JW
2917 virtio_queue_set_notification(q->tx_vq, 0);
2918 qemu_bh_schedule(q->tx_bh);
2919 q->tx_waiting = 1;
a697a334
AW
2920 }
2921}
2922
f9d6dbf0
WC
2923static void virtio_net_add_queue(VirtIONet *n, int index)
2924{
2925 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2926
1c0fbfa3
MT
2927 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2928 virtio_net_handle_rx);
9b02e161 2929
f9d6dbf0
WC
2930 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2931 n->vqs[index].tx_vq =
9b02e161
WW
2932 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2933 virtio_net_handle_tx_timer);
f9d6dbf0
WC
2934 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2935 virtio_net_tx_timer,
2936 &n->vqs[index]);
2937 } else {
2938 n->vqs[index].tx_vq =
9b02e161
WW
2939 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2940 virtio_net_handle_tx_bh);
f63192b0
AB
2941 n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
2942 &DEVICE(vdev)->mem_reentrancy_guard);
f9d6dbf0
WC
2943 }
2944
2945 n->vqs[index].tx_waiting = 0;
2946 n->vqs[index].n = n;
2947}
2948
2949static void virtio_net_del_queue(VirtIONet *n, int index)
2950{
2951 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2952 VirtIONetQueue *q = &n->vqs[index];
2953 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2954
2955 qemu_purge_queued_packets(nc);
2956
2957 virtio_del_queue(vdev, index * 2);
2958 if (q->tx_timer) {
f9d6dbf0 2959 timer_free(q->tx_timer);
f989c30c 2960 q->tx_timer = NULL;
f9d6dbf0
WC
2961 } else {
2962 qemu_bh_delete(q->tx_bh);
f989c30c 2963 q->tx_bh = NULL;
f9d6dbf0 2964 }
f989c30c 2965 q->tx_waiting = 0;
f9d6dbf0
WC
2966 virtio_del_queue(vdev, index * 2 + 1);
2967}
2968
441537f1 2969static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
f9d6dbf0
WC
2970{
2971 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2972 int old_num_queues = virtio_get_num_queues(vdev);
441537f1 2973 int new_num_queues = new_max_queue_pairs * 2 + 1;
f9d6dbf0
WC
2974 int i;
2975
2976 assert(old_num_queues >= 3);
2977 assert(old_num_queues % 2 == 1);
2978
2979 if (old_num_queues == new_num_queues) {
2980 return;
2981 }
2982
2983 /*
2984 * We always need to remove and add ctrl vq if
2985 * old_num_queues != new_num_queues. Remove ctrl_vq first,
20f86a75 2986 * and then we only enter one of the following two loops.
f9d6dbf0
WC
2987 */
2988 virtio_del_queue(vdev, old_num_queues - 1);
2989
2990 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2991 /* new_num_queues < old_num_queues */
2992 virtio_net_del_queue(n, i / 2);
2993 }
2994
2995 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2996 /* new_num_queues > old_num_queues */
2997 virtio_net_add_queue(n, i / 2);
2998 }
2999
3000 /* add ctrl_vq last */
3001 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3002}
3003
ec57db16 3004static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
fed699f9 3005{
441537f1 3006 int max = multiqueue ? n->max_queue_pairs : 1;
f9d6dbf0 3007
fed699f9 3008 n->multiqueue = multiqueue;
441537f1 3009 virtio_net_change_num_queue_pairs(n, max);
fed699f9 3010
441537f1 3011 virtio_net_set_queue_pairs(n);
fed699f9
JW
3012}
3013
982b78c5 3014static int virtio_net_post_load_device(void *opaque, int version_id)
037dab2f 3015{
982b78c5
DDAG
3016 VirtIONet *n = opaque;
3017 VirtIODevice *vdev = VIRTIO_DEVICE(n);
037dab2f 3018 int i, link_down;
fbe78f4f 3019
9d8c6a25 3020 trace_virtio_net_post_load_device();
982b78c5 3021 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
95129d6f 3022 virtio_vdev_has_feature(vdev,
e22f0603
YB
3023 VIRTIO_F_VERSION_1),
3024 virtio_vdev_has_feature(vdev,
3025 VIRTIO_NET_F_HASH_REPORT));
fbe78f4f 3026
76010cb3 3027 /* MAC_TABLE_ENTRIES may be different from the saved image */
982b78c5 3028 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
76010cb3 3029 n->mac_table.in_use = 0;
b6503ed9 3030 }
0ce0e8f4 3031
982b78c5 3032 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
6c666823
MT
3033 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3034 }
3035
7788c3f2
MS
3036 /*
3037 * curr_guest_offloads will be later overwritten by the
3038 * virtio_set_features_nocheck call done from the virtio_load.
3039 * Here we make sure it is preserved and restored accordingly
3040 * in the virtio_net_post_load_virtio callback.
3041 */
3042 n->saved_guest_offloads = n->curr_guest_offloads;
6c666823 3043
441537f1 3044 virtio_net_set_queue_pairs(n);
5f800801 3045
2d9aba39
AW
3046 /* Find the first multicast entry in the saved MAC filter */
3047 for (i = 0; i < n->mac_table.in_use; i++) {
3048 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3049 break;
3050 }
3051 }
3052 n->mac_table.first_multi = i;
98991481
AK
3053
3054 /* nc.link_down can't be migrated, so infer link_down according
3055 * to link status bit in n->status */
5f800801 3056 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
441537f1 3057 for (i = 0; i < n->max_queue_pairs; i++) {
5f800801
JW
3058 qemu_get_subqueue(n->nic, i)->link_down = link_down;
3059 }
98991481 3060
6c666823
MT
3061 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3062 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
9d8c6a25
DDAG
3063 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3064 QEMU_CLOCK_VIRTUAL,
3065 virtio_net_announce_timer, n);
3066 if (n->announce_timer.round) {
3067 timer_mod(n->announce_timer.tm,
3068 qemu_clock_get_ms(n->announce_timer.type));
3069 } else {
944458b6 3070 qemu_announce_timer_del(&n->announce_timer, false);
9d8c6a25 3071 }
6c666823
MT
3072 }
3073
e41b7114 3074 if (n->rss_data.enabled) {
0145c393
AM
3075 n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
3076 if (!n->rss_data.populate_hash) {
3077 if (!virtio_net_attach_epbf_rss(n)) {
3078 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
3079 warn_report("Can't post-load eBPF RSS for vhost");
3080 } else {
3081 warn_report("Can't post-load eBPF RSS - "
3082 "fallback to software RSS");
3083 n->rss_data.enabled_software_rss = true;
3084 }
3085 }
3086 }
3087
e41b7114
YB
3088 trace_virtio_net_rss_enable(n->rss_data.hash_types,
3089 n->rss_data.indirections_len,
3090 sizeof(n->rss_data.key));
3091 } else {
3092 trace_virtio_net_rss_disable();
3093 }
fbe78f4f
AL
3094 return 0;
3095}
3096
7788c3f2
MS
3097static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3098{
3099 VirtIONet *n = VIRTIO_NET(vdev);
3100 /*
3101 * The actual needed state is now in saved_guest_offloads,
3102 * see virtio_net_post_load_device for detail.
3103 * Restore it back and apply the desired offloads.
3104 */
3105 n->curr_guest_offloads = n->saved_guest_offloads;
3106 if (peer_has_vnet_hdr(n)) {
3107 virtio_net_apply_guest_offloads(n);
3108 }
3109
3110 return 0;
3111}
3112
982b78c5
DDAG
3113/* tx_waiting field of a VirtIONetQueue */
3114static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3115 .name = "virtio-net-queue-tx_waiting",
1de81b42 3116 .fields = (const VMStateField[]) {
982b78c5
DDAG
3117 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3118 VMSTATE_END_OF_LIST()
3119 },
3120};
3121
441537f1 3122static bool max_queue_pairs_gt_1(void *opaque, int version_id)
982b78c5 3123{
441537f1 3124 return VIRTIO_NET(opaque)->max_queue_pairs > 1;
982b78c5
DDAG
3125}
3126
3127static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3128{
3129 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3130 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3131}
3132
3133static bool mac_table_fits(void *opaque, int version_id)
3134{
3135 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3136}
3137
3138static bool mac_table_doesnt_fit(void *opaque, int version_id)
3139{
3140 return !mac_table_fits(opaque, version_id);
3141}
3142
3143/* This temporary type is shared by all the WITH_TMP methods
3144 * although only some fields are used by each.
3145 */
3146struct VirtIONetMigTmp {
3147 VirtIONet *parent;
3148 VirtIONetQueue *vqs_1;
441537f1 3149 uint16_t curr_queue_pairs_1;
982b78c5
DDAG
3150 uint8_t has_ufo;
3151 uint32_t has_vnet_hdr;
3152};
3153
3154/* The 2nd and subsequent tx_waiting flags are loaded later than
441537f1 3155 * the 1st entry in the queue_pairs and only if there's more than one
982b78c5
DDAG
3156 * entry. We use the tmp mechanism to calculate a temporary
3157 * pointer and count and also validate the count.
3158 */
3159
44b1ff31 3160static int virtio_net_tx_waiting_pre_save(void *opaque)
982b78c5
DDAG
3161{
3162 struct VirtIONetMigTmp *tmp = opaque;
3163
3164 tmp->vqs_1 = tmp->parent->vqs + 1;
441537f1
JW
3165 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3166 if (tmp->parent->curr_queue_pairs == 0) {
3167 tmp->curr_queue_pairs_1 = 0;
982b78c5 3168 }
44b1ff31
DDAG
3169
3170 return 0;
982b78c5
DDAG
3171}
3172
3173static int virtio_net_tx_waiting_pre_load(void *opaque)
3174{
3175 struct VirtIONetMigTmp *tmp = opaque;
3176
3177 /* Reuse the pointer setup from save */
3178 virtio_net_tx_waiting_pre_save(opaque);
3179
441537f1
JW
3180 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3181 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3182 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
982b78c5
DDAG
3183
3184 return -EINVAL;
3185 }
3186
3187 return 0; /* all good */
3188}
3189
3190static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3191 .name = "virtio-net-tx_waiting",
3192 .pre_load = virtio_net_tx_waiting_pre_load,
3193 .pre_save = virtio_net_tx_waiting_pre_save,
1de81b42 3194 .fields = (const VMStateField[]) {
982b78c5 3195 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
441537f1 3196 curr_queue_pairs_1,
982b78c5
DDAG
3197 vmstate_virtio_net_queue_tx_waiting,
3198 struct VirtIONetQueue),
3199 VMSTATE_END_OF_LIST()
3200 },
3201};
3202
3203/* the 'has_ufo' flag is just tested; if the incoming stream has the
3204 * flag set we need to check that we have it
3205 */
3206static int virtio_net_ufo_post_load(void *opaque, int version_id)
3207{
3208 struct VirtIONetMigTmp *tmp = opaque;
3209
3210 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3211 error_report("virtio-net: saved image requires TUN_F_UFO support");
3212 return -EINVAL;
3213 }
3214
3215 return 0;
3216}
3217
44b1ff31 3218static int virtio_net_ufo_pre_save(void *opaque)
982b78c5
DDAG
3219{
3220 struct VirtIONetMigTmp *tmp = opaque;
3221
3222 tmp->has_ufo = tmp->parent->has_ufo;
44b1ff31
DDAG
3223
3224 return 0;
982b78c5
DDAG
3225}
3226
3227static const VMStateDescription vmstate_virtio_net_has_ufo = {
3228 .name = "virtio-net-ufo",
3229 .post_load = virtio_net_ufo_post_load,
3230 .pre_save = virtio_net_ufo_pre_save,
1de81b42 3231 .fields = (const VMStateField[]) {
982b78c5
DDAG
3232 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3233 VMSTATE_END_OF_LIST()
3234 },
3235};
3236
3237/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3238 * flag set we need to check that we have it
3239 */
3240static int virtio_net_vnet_post_load(void *opaque, int version_id)
3241{
3242 struct VirtIONetMigTmp *tmp = opaque;
3243
3244 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3245 error_report("virtio-net: saved image requires vnet_hdr=on");
3246 return -EINVAL;
3247 }
3248
3249 return 0;
3250}
3251
44b1ff31 3252static int virtio_net_vnet_pre_save(void *opaque)
982b78c5
DDAG
3253{
3254 struct VirtIONetMigTmp *tmp = opaque;
3255
3256 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
44b1ff31
DDAG
3257
3258 return 0;
982b78c5
DDAG
3259}
3260
3261static const VMStateDescription vmstate_virtio_net_has_vnet = {
3262 .name = "virtio-net-vnet",
3263 .post_load = virtio_net_vnet_post_load,
3264 .pre_save = virtio_net_vnet_pre_save,
1de81b42 3265 .fields = (const VMStateField[]) {
982b78c5
DDAG
3266 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3267 VMSTATE_END_OF_LIST()
3268 },
3269};
3270
e41b7114
YB
3271static bool virtio_net_rss_needed(void *opaque)
3272{
3273 return VIRTIO_NET(opaque)->rss_data.enabled;
3274}
3275
3276static const VMStateDescription vmstate_virtio_net_rss = {
3277 .name = "virtio-net-device/rss",
3278 .version_id = 1,
3279 .minimum_version_id = 1,
3280 .needed = virtio_net_rss_needed,
1de81b42 3281 .fields = (const VMStateField[]) {
e41b7114
YB
3282 VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3283 VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3284 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3285 VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3286 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3287 VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3288 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3289 VIRTIO_NET_RSS_MAX_KEY_SIZE),
3290 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3291 rss_data.indirections_len, 0,
3292 vmstate_info_uint16, uint16_t),
3293 VMSTATE_END_OF_LIST()
3294 },
3295};
3296
982b78c5
DDAG
3297static const VMStateDescription vmstate_virtio_net_device = {
3298 .name = "virtio-net-device",
3299 .version_id = VIRTIO_NET_VM_VERSION,
3300 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3301 .post_load = virtio_net_post_load_device,
1de81b42 3302 .fields = (const VMStateField[]) {
982b78c5
DDAG
3303 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3304 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3305 vmstate_virtio_net_queue_tx_waiting,
3306 VirtIONetQueue),
3307 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3308 VMSTATE_UINT16(status, VirtIONet),
3309 VMSTATE_UINT8(promisc, VirtIONet),
3310 VMSTATE_UINT8(allmulti, VirtIONet),
3311 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3312
3313 /* Guarded pair: If it fits we load it, else we throw it away
3314 * - can happen if source has a larger MAC table.; post-load
3315 * sets flags in this case.
3316 */
3317 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3318 0, mac_table_fits, mac_table.in_use,
3319 ETH_ALEN),
3320 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3321 mac_table.in_use, ETH_ALEN),
3322
3323 /* Note: This is an array of uint32's that's always been saved as a
3324 * buffer; hold onto your endiannesses; it's actually used as a bitmap
3325 * but based on the uint.
3326 */
3327 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3328 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3329 vmstate_virtio_net_has_vnet),
3330 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3331 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3332 VMSTATE_UINT8(alluni, VirtIONet),
3333 VMSTATE_UINT8(nomulti, VirtIONet),
3334 VMSTATE_UINT8(nouni, VirtIONet),
3335 VMSTATE_UINT8(nobcast, VirtIONet),
3336 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3337 vmstate_virtio_net_has_ufo),
441537f1 3338 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
982b78c5 3339 vmstate_info_uint16_equal, uint16_t),
441537f1 3340 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
982b78c5
DDAG
3341 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3342 vmstate_virtio_net_tx_waiting),
3343 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3344 has_ctrl_guest_offloads),
3345 VMSTATE_END_OF_LIST()
1de81b42
RH
3346 },
3347 .subsections = (const VMStateDescription * const []) {
e41b7114
YB
3348 &vmstate_virtio_net_rss,
3349 NULL
3350 }
982b78c5
DDAG
3351};
3352
eb6b6c12 3353static NetClientInfo net_virtio_info = {
f394b2e2 3354 .type = NET_CLIENT_DRIVER_NIC,
eb6b6c12
MM
3355 .size = sizeof(NICState),
3356 .can_receive = virtio_net_can_receive,
3357 .receive = virtio_net_receive,
eb6b6c12 3358 .link_status_changed = virtio_net_set_link_status,
b1be4280 3359 .query_rx_filter = virtio_net_query_rxfilter,
b2c929f0 3360 .announce = virtio_net_announce,
eb6b6c12
MM
3361};
3362
f56a1247
MT
3363static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3364{
17a0ca55 3365 VirtIONet *n = VIRTIO_NET(vdev);
68b0a639 3366 NetClientState *nc;
f56a1247 3367 assert(n->vhost_started);
68b0a639
SWL
3368 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3369 /* Must guard against invalid features and bogus queue index
3370 * from being set by malicious guest, or penetrated through
3371 * buggy migration stream.
3372 */
3373 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3374 qemu_log_mask(LOG_GUEST_ERROR,
3375 "%s: bogus vq index ignored\n", __func__);
3376 return false;
3377 }
3378 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3379 } else {
3380 nc = qemu_get_subqueue(n->nic, vq2q(idx));
3381 }
544f0278
CL
3382 /*
3383 * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
7e8094f0 3384 * as the macro of configure interrupt's IDX, If this driver does not
544f0278
CL
3385 * support, the function will return false
3386 */
3387
3388 if (idx == VIRTIO_CONFIG_IRQ_IDX) {
8aab0d1d 3389 return vhost_net_config_pending(get_vhost_net(nc->peer));
544f0278 3390 }
ed8b4afe 3391 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
f56a1247
MT
3392}
3393
3394static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3395 bool mask)
3396{
17a0ca55 3397 VirtIONet *n = VIRTIO_NET(vdev);
68b0a639 3398 NetClientState *nc;
f56a1247 3399 assert(n->vhost_started);
68b0a639
SWL
3400 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3401 /* Must guard against invalid features and bogus queue index
3402 * from being set by malicious guest, or penetrated through
3403 * buggy migration stream.
3404 */
3405 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3406 qemu_log_mask(LOG_GUEST_ERROR,
3407 "%s: bogus vq index ignored\n", __func__);
3408 return;
3409 }
3410 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3411 } else {
3412 nc = qemu_get_subqueue(n->nic, vq2q(idx));
3413 }
544f0278
CL
3414 /*
3415 *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
7e8094f0 3416 * as the macro of configure interrupt's IDX, If this driver does not
544f0278
CL
3417 * support, the function will return
3418 */
3419
3420 if (idx == VIRTIO_CONFIG_IRQ_IDX) {
8aab0d1d 3421 vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
544f0278
CL
3422 return;
3423 }
544f0278 3424 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
f56a1247
MT
3425}
3426
019a3edb 3427static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
fbe78f4f 3428{
0cd09c3a 3429 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
a93e599d 3430
d74c30c8 3431 n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
17ec5a86
FK
3432}
3433
8a253ec2
FK
3434void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3435 const char *type)
3436{
3437 /*
3438 * The name can be NULL, the netclient name will be type.x.
3439 */
3440 assert(type != NULL);
3441
9e288406 3442 g_free(n->netclient_name);
9e288406 3443 g_free(n->netclient_type);
80e0090a 3444 n->netclient_name = g_strdup(name);
8a253ec2
FK
3445 n->netclient_type = g_strdup(type);
3446}
3447
0e9a65c5 3448static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
9711cd0d
JF
3449{
3450 HotplugHandler *hotplug_ctrl;
3451 PCIDevice *pci_dev;
3452 Error *err = NULL;
3453
0e9a65c5 3454 hotplug_ctrl = qdev_get_hotplug_handler(dev);
9711cd0d 3455 if (hotplug_ctrl) {
0e9a65c5 3456 pci_dev = PCI_DEVICE(dev);
9711cd0d 3457 pci_dev->partially_hotplugged = true;
0e9a65c5 3458 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
9711cd0d
JF
3459 if (err) {
3460 error_report_err(err);
3461 return false;
3462 }
3463 } else {
3464 return false;
3465 }
3466 return true;
3467}
3468
0e9a65c5
JQ
3469static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3470 Error **errp)
9711cd0d 3471{
5a0948d3 3472 Error *err = NULL;
9711cd0d 3473 HotplugHandler *hotplug_ctrl;
0e9a65c5 3474 PCIDevice *pdev = PCI_DEVICE(dev);
78274682 3475 BusState *primary_bus;
9711cd0d
JF
3476
3477 if (!pdev->partially_hotplugged) {
3478 return true;
3479 }
0e9a65c5 3480 primary_bus = dev->parent_bus;
78274682 3481 if (!primary_bus) {
150ab54a 3482 error_setg(errp, "virtio_net: couldn't find primary bus");
5a0948d3 3483 return false;
9711cd0d 3484 }
0e9a65c5 3485 qdev_set_parent_bus(dev, primary_bus, &error_abort);
e2bde83e 3486 qatomic_set(&n->failover_primary_hidden, false);
0e9a65c5 3487 hotplug_ctrl = qdev_get_hotplug_handler(dev);
150ab54a 3488 if (hotplug_ctrl) {
0e9a65c5 3489 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
5a0948d3
MA
3490 if (err) {
3491 goto out;
3492 }
0e9a65c5 3493 hotplug_handler_plug(hotplug_ctrl, dev, &err);
150ab54a 3494 }
109c20ea 3495 pdev->partially_hotplugged = false;
150ab54a
JF
3496
3497out:
5a0948d3
MA
3498 error_propagate(errp, err);
3499 return !err;
9711cd0d
JF
3500}
3501
07a5d816 3502static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
9711cd0d
JF
3503{
3504 bool should_be_hidden;
3505 Error *err = NULL;
07a5d816 3506 DeviceState *dev = failover_find_primary_device(n);
9711cd0d 3507
07a5d816
JQ
3508 if (!dev) {
3509 return;
9711cd0d
JF
3510 }
3511
07a5d816
JQ
3512 should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3513
4dbac1ae 3514 if (migration_in_setup(s) && !should_be_hidden) {
07a5d816
JQ
3515 if (failover_unplug_primary(n, dev)) {
3516 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3517 qapi_event_send_unplug_primary(dev->id);
e2bde83e 3518 qatomic_set(&n->failover_primary_hidden, true);
9711cd0d
JF
3519 } else {
3520 warn_report("couldn't unplug primary device");
3521 }
3522 } else if (migration_has_failed(s)) {
150ab54a 3523 /* We already unplugged the device let's plug it back */
07a5d816 3524 if (!failover_replug_primary(n, dev, &err)) {
9711cd0d
JF
3525 if (err) {
3526 error_report_err(err);
3527 }
3528 }
3529 }
3530}
3531
3532static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3533{
3534 MigrationState *s = data;
3535 VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3536 virtio_net_handle_migration_primary(n, s);
3537}
3538
b91ad981 3539static bool failover_hide_primary_device(DeviceListener *listener,
f3558b1b
KW
3540 const QDict *device_opts,
3541 bool from_json,
3542 Error **errp)
9711cd0d
JF
3543{
3544 VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
4f0303ae 3545 const char *standby_id;
9711cd0d 3546
4d0e59ac 3547 if (!device_opts) {
89631fed 3548 return false;
4d0e59ac 3549 }
bcfc906b
LV
3550
3551 if (!qdict_haskey(device_opts, "failover_pair_id")) {
3552 return false;
3553 }
3554
3555 if (!qdict_haskey(device_opts, "id")) {
3556 error_setg(errp, "Device with failover_pair_id needs to have id");
3557 return false;
3558 }
3559
3560 standby_id = qdict_get_str(device_opts, "failover_pair_id");
89631fed
JQ
3561 if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3562 return false;
9711cd0d
JF
3563 }
3564
7fe7791e
LV
3565 /*
3566 * The hide helper can be called several times for a given device.
3567 * Check there is only one primary for a virtio-net device but
3568 * don't duplicate the qdict several times if it's called for the same
3569 * device.
3570 */
259a10db 3571 if (n->primary_opts) {
7fe7791e
LV
3572 const char *old, *new;
3573 /* devices with failover_pair_id always have an id */
3574 old = qdict_get_str(n->primary_opts, "id");
3575 new = qdict_get_str(device_opts, "id");
3576 if (strcmp(old, new) != 0) {
3577 error_setg(errp, "Cannot attach more than one primary device to "
3578 "'%s': '%s' and '%s'", n->netclient_name, old, new);
3579 return false;
3580 }
3581 } else {
3582 n->primary_opts = qdict_clone_shallow(device_opts);
3583 n->primary_opts_from_json = from_json;
259a10db
KW
3584 }
3585
e2bde83e 3586 /* failover_primary_hidden is set during feature negotiation */
3abad4a2 3587 return qatomic_read(&n->failover_primary_hidden);
9711cd0d
JF
3588}
3589
e6f746b3 3590static void virtio_net_device_realize(DeviceState *dev, Error **errp)
17ec5a86 3591{
e6f746b3 3592 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
284a32f0 3593 VirtIONet *n = VIRTIO_NET(dev);
b1be4280 3594 NetClientState *nc;
284a32f0 3595 int i;
1773d9ee 3596
a93e599d 3597 if (n->net_conf.mtu) {
127833ee 3598 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
a93e599d
MC
3599 }
3600
9473939e
JB
3601 if (n->net_conf.duplex_str) {
3602 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3603 n->net_conf.duplex = DUPLEX_HALF;
3604 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3605 n->net_conf.duplex = DUPLEX_FULL;
3606 } else {
3607 error_setg(errp, "'duplex' must be 'half' or 'full'");
843c4cfc 3608 return;
9473939e
JB
3609 }
3610 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3611 } else {
3612 n->net_conf.duplex = DUPLEX_UNKNOWN;
3613 }
3614
3615 if (n->net_conf.speed < SPEED_UNKNOWN) {
3616 error_setg(errp, "'speed' must be between 0 and INT_MAX");
843c4cfc
MA
3617 return;
3618 }
3619 if (n->net_conf.speed >= 0) {
9473939e
JB
3620 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3621 }
3622
9711cd0d 3623 if (n->failover) {
b91ad981 3624 n->primary_listener.hide_device = failover_hide_primary_device;
e2bde83e 3625 qatomic_set(&n->failover_primary_hidden, true);
9711cd0d 3626 device_listener_register(&n->primary_listener);
d9cda213
SS
3627 migration_add_notifier(&n->migration_state,
3628 virtio_net_migration_state_notifier);
9711cd0d
JF
3629 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3630 }
3631
da3e8a23 3632 virtio_net_set_config_size(n, n->host_features);
3857cd5c 3633 virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
fbe78f4f 3634
1c0fbfa3
MT
3635 /*
3636 * We set a lower limit on RX queue size to what it always was.
3637 * Guests that want a smaller ring can always resize it without
3638 * help from us (using virtio 1 and up).
3639 */
3640 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3641 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
5f997fd1 3642 !is_power_of_2(n->net_conf.rx_queue_size)) {
1c0fbfa3
MT
3643 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3644 "must be a power of 2 between %d and %d.",
3645 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3646 VIRTQUEUE_MAX_SIZE);
3647 virtio_cleanup(vdev);
3648 return;
3649 }
3650
9b02e161 3651 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
4271f403 3652 n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
9b02e161
WW
3653 !is_power_of_2(n->net_conf.tx_queue_size)) {
3654 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3655 "must be a power of 2 between %d and %d",
3656 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
4271f403 3657 virtio_net_max_tx_queue_size(n));
9b02e161
WW
3658 virtio_cleanup(vdev);
3659 return;
3660 }
3661
22288fe5
JW
3662 n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3663
3664 /*
3665 * Figure out the datapath queue pairs since the backend could
3666 * provide control queue via peers as well.
3667 */
3668 if (n->nic_conf.peers.queues) {
3669 for (i = 0; i < n->max_ncs; i++) {
3670 if (n->nic_conf.peers.ncs[i]->is_datapath) {
3671 ++n->max_queue_pairs;
3672 }
3673 }
3674 }
3675 n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3676
441537f1 3677 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
22288fe5 3678 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
631b22ea 3679 "must be a positive integer less than %d.",
441537f1 3680 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
7e0e736e
JW
3681 virtio_cleanup(vdev);
3682 return;
3683 }
b21e2380 3684 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
441537f1 3685 n->curr_queue_pairs = 1;
1773d9ee 3686 n->tx_timeout = n->net_conf.txtimer;
a697a334 3687
1773d9ee
FK
3688 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3689 && strcmp(n->net_conf.tx, "bh")) {
0765691e
MA
3690 warn_report("virtio-net: "
3691 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3692 n->net_conf.tx);
3693 error_printf("Defaulting to \"bh\"");
a697a334
AW
3694 }
3695
2eef278b
MT
3696 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3697 n->net_conf.tx_queue_size);
9b02e161 3698
441537f1 3699 for (i = 0; i < n->max_queue_pairs; i++) {
f9d6dbf0 3700 virtio_net_add_queue(n, i);
a697a334 3701 }
da51a335 3702
17a0ca55 3703 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1773d9ee
FK
3704 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3705 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
554c97dd 3706 n->status = VIRTIO_NET_S_LINK_UP;
9d8c6a25
DDAG
3707 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3708 QEMU_CLOCK_VIRTUAL,
3709 virtio_net_announce_timer, n);
b2c929f0 3710 n->announce_timer.round = 0;
fbe78f4f 3711
8a253ec2
FK
3712 if (n->netclient_type) {
3713 /*
3714 * Happen when virtio_net_set_netclient_name has been called.
3715 */
3716 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
7d0fefdf
AO
3717 n->netclient_type, n->netclient_name,
3718 &dev->mem_reentrancy_guard, n);
8a253ec2
FK
3719 } else {
3720 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
7d0fefdf
AO
3721 object_get_typename(OBJECT(dev)), dev->id,
3722 &dev->mem_reentrancy_guard, n);
8a253ec2
FK
3723 }
3724
441537f1 3725 for (i = 0; i < n->max_queue_pairs; i++) {
d4c62930
BM
3726 n->nic->ncs[i].do_not_pad = true;
3727 }
3728
6e371ab8
MT
3729 peer_test_vnet_hdr(n);
3730 if (peer_has_vnet_hdr(n)) {
441537f1 3731 for (i = 0; i < n->max_queue_pairs; i++) {
d6085e3a 3732 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
fed699f9 3733 }
6e371ab8
MT
3734 n->host_hdr_len = sizeof(struct virtio_net_hdr);
3735 } else {
3736 n->host_hdr_len = 0;
3737 }
eb6b6c12 3738
1773d9ee 3739 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
96d5e201 3740
fed699f9 3741 n->vqs[0].tx_waiting = 0;
1773d9ee 3742 n->tx_burst = n->net_conf.txburst;
e22f0603 3743 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
002437cd 3744 n->promisc = 1; /* for compatibility */
fbe78f4f 3745
7267c094 3746 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
b6503ed9 3747
7267c094 3748 n->vlans = g_malloc0(MAX_VLAN >> 3);
f21c0ed9 3749
b1be4280
AK
3750 nc = qemu_get_queue(n->nic);
3751 nc->rxfilter_notify_enabled = 1;
3752
e87936ea
CL
3753 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3754 struct virtio_net_config netcfg = {};
3755 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3756 vhost_net_set_config(get_vhost_net(nc->peer),
f8ed3648 3757 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
e87936ea 3758 }
2974e916 3759 QTAILQ_INIT(&n->rsc_chains);
284a32f0 3760 n->qdev = dev;
4474e37a 3761
aac8f89d 3762 net_rx_pkt_init(&n->rx_pkt);
0145c393
AM
3763
3764 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3765 virtio_net_load_ebpf(n);
3766 }
17ec5a86
FK
3767}
3768
b69c3c21 3769static void virtio_net_device_unrealize(DeviceState *dev)
17ec5a86 3770{
306ec6c3
AF
3771 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3772 VirtIONet *n = VIRTIO_NET(dev);
441537f1 3773 int i, max_queue_pairs;
17ec5a86 3774
0145c393
AM
3775 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3776 virtio_net_unload_ebpf(n);
3777 }
3778
17ec5a86
FK
3779 /* This will stop vhost backend if appropriate. */
3780 virtio_net_set_status(vdev, 0);
3781
9e288406
MA
3782 g_free(n->netclient_name);
3783 n->netclient_name = NULL;
3784 g_free(n->netclient_type);
3785 n->netclient_type = NULL;
8a253ec2 3786
17ec5a86
FK
3787 g_free(n->mac_table.macs);
3788 g_free(n->vlans);
3789
9711cd0d 3790 if (n->failover) {
f3558b1b 3791 qobject_unref(n->primary_opts);
65018100 3792 device_listener_unregister(&n->primary_listener);
d9cda213 3793 migration_remove_notifier(&n->migration_state);
f3558b1b
KW
3794 } else {
3795 assert(n->primary_opts == NULL);
9711cd0d
JF
3796 }
3797
441537f1
JW
3798 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3799 for (i = 0; i < max_queue_pairs; i++) {
f9d6dbf0 3800 virtio_net_del_queue(n, i);
17ec5a86 3801 }
d945d9f1 3802 /* delete also control vq */
441537f1 3803 virtio_del_queue(vdev, max_queue_pairs * 2);
944458b6 3804 qemu_announce_timer_del(&n->announce_timer, false);
17ec5a86
FK
3805 g_free(n->vqs);
3806 qemu_del_nic(n->nic);
2974e916 3807 virtio_net_rsc_cleanup(n);
59079029 3808 g_free(n->rss_data.indirections_table);
4474e37a 3809 net_rx_pkt_uninit(n->rx_pkt);
6a1a8cc7 3810 virtio_cleanup(vdev);
17ec5a86
FK
3811}
3812
3813static void virtio_net_instance_init(Object *obj)
3814{
3815 VirtIONet *n = VIRTIO_NET(obj);
3816
3817 /*
3818 * The default config_size is sizeof(struct virtio_net_config).
2431f4f1 3819 * Can be overridden with virtio_net_set_config_size.
17ec5a86
FK
3820 */
3821 n->config_size = sizeof(struct virtio_net_config);
aa4197c3
GA
3822 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3823 "bootindex", "/ethernet-phy@0",
40c2281c 3824 DEVICE(n));
0145c393
AM
3825
3826 ebpf_rss_init(&n->ebpf_rss);
17ec5a86
FK
3827}
3828
44b1ff31 3829static int virtio_net_pre_save(void *opaque)
4d45dcfb
HP
3830{
3831 VirtIONet *n = opaque;
3832
3833 /* At this point, backend must be stopped, otherwise
3834 * it might keep writing to memory. */
3835 assert(!n->vhost_started);
44b1ff31
DDAG
3836
3837 return 0;
4d45dcfb
HP
3838}
3839
9711cd0d
JF
3840static bool primary_unplug_pending(void *opaque)
3841{
3842 DeviceState *dev = opaque;
21e8709b 3843 DeviceState *primary;
9711cd0d
JF
3844 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3845 VirtIONet *n = VIRTIO_NET(vdev);
3846
284f42a5
JF
3847 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3848 return false;
3849 }
21e8709b
JQ
3850 primary = failover_find_primary_device(n);
3851 return primary ? primary->pending_deleted_event : false;
9711cd0d
JF
3852}
3853
3854static bool dev_unplug_pending(void *opaque)
3855{
3856 DeviceState *dev = opaque;
3857 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3858
3859 return vdc->primary_unplug_pending(dev);
3860}
3861
c255488d
JP
3862static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3863{
3864 VirtIONet *n = VIRTIO_NET(vdev);
3865 NetClientState *nc = qemu_get_queue(n->nic);
3866 struct vhost_net *net = get_vhost_net(nc->peer);
3867 return &net->dev;
3868}
3869
4d45dcfb
HP
3870static const VMStateDescription vmstate_virtio_net = {
3871 .name = "virtio-net",
3872 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3873 .version_id = VIRTIO_NET_VM_VERSION,
1de81b42 3874 .fields = (const VMStateField[]) {
4d45dcfb
HP
3875 VMSTATE_VIRTIO_DEVICE,
3876 VMSTATE_END_OF_LIST()
3877 },
3878 .pre_save = virtio_net_pre_save,
9711cd0d 3879 .dev_unplug_pending = dev_unplug_pending,
4d45dcfb 3880};
290c2428 3881
17ec5a86 3882static Property virtio_net_properties[] = {
127833ee
JB
3883 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3884 VIRTIO_NET_F_CSUM, true),
3885 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
87108bb2 3886 VIRTIO_NET_F_GUEST_CSUM, true),
127833ee
JB
3887 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3888 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
87108bb2 3889 VIRTIO_NET_F_GUEST_TSO4, true),
127833ee 3890 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
87108bb2 3891 VIRTIO_NET_F_GUEST_TSO6, true),
127833ee 3892 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
87108bb2 3893 VIRTIO_NET_F_GUEST_ECN, true),
127833ee 3894 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
87108bb2 3895 VIRTIO_NET_F_GUEST_UFO, true),
127833ee 3896 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
87108bb2 3897 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
127833ee 3898 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
87108bb2 3899 VIRTIO_NET_F_HOST_TSO4, true),
127833ee 3900 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
87108bb2 3901 VIRTIO_NET_F_HOST_TSO6, true),
127833ee 3902 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
87108bb2 3903 VIRTIO_NET_F_HOST_ECN, true),
127833ee 3904 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
87108bb2 3905 VIRTIO_NET_F_HOST_UFO, true),
127833ee 3906 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
87108bb2 3907 VIRTIO_NET_F_MRG_RXBUF, true),
127833ee 3908 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
87108bb2 3909 VIRTIO_NET_F_STATUS, true),
127833ee 3910 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
87108bb2 3911 VIRTIO_NET_F_CTRL_VQ, true),
127833ee 3912 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
87108bb2 3913 VIRTIO_NET_F_CTRL_RX, true),
127833ee 3914 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
87108bb2 3915 VIRTIO_NET_F_CTRL_VLAN, true),
127833ee 3916 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
87108bb2 3917 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
127833ee 3918 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
87108bb2 3919 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
127833ee 3920 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
87108bb2 3921 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
127833ee 3922 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
59079029
YB
3923 DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3924 VIRTIO_NET_F_RSS, false),
e22f0603
YB
3925 DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3926 VIRTIO_NET_F_HASH_REPORT, false),
2974e916
YB
3927 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3928 VIRTIO_NET_F_RSC_EXT, false),
3929 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3930 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
17ec5a86
FK
3931 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3932 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
87108bb2 3933 TX_TIMER_INTERVAL),
17ec5a86
FK
3934 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3935 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
1c0fbfa3
MT
3936 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3937 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
9b02e161
WW
3938 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3939 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
a93e599d 3940 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
75ebec11
MC
3941 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3942 true),
9473939e
JB
3943 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3944 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
9711cd0d 3945 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
53da8b5a
YB
3946 DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
3947 VIRTIO_NET_F_GUEST_USO4, true),
3948 DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
3949 VIRTIO_NET_F_GUEST_USO6, true),
3950 DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
3951 VIRTIO_NET_F_HOST_USO, true),
17ec5a86
FK
3952 DEFINE_PROP_END_OF_LIST(),
3953};
3954
3955static void virtio_net_class_init(ObjectClass *klass, void *data)
3956{
3957 DeviceClass *dc = DEVICE_CLASS(klass);
3958 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
e6f746b3 3959
4f67d30b 3960 device_class_set_props(dc, virtio_net_properties);
290c2428 3961 dc->vmsd = &vmstate_virtio_net;
125ee0ed 3962 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
e6f746b3 3963 vdc->realize = virtio_net_device_realize;
306ec6c3 3964 vdc->unrealize = virtio_net_device_unrealize;
17ec5a86
FK
3965 vdc->get_config = virtio_net_get_config;
3966 vdc->set_config = virtio_net_set_config;
3967 vdc->get_features = virtio_net_get_features;
3968 vdc->set_features = virtio_net_set_features;
3969 vdc->bad_features = virtio_net_bad_features;
3970 vdc->reset = virtio_net_reset;
7dc6be52 3971 vdc->queue_reset = virtio_net_queue_reset;
7f863302 3972 vdc->queue_enable = virtio_net_queue_enable;
17ec5a86
FK
3973 vdc->set_status = virtio_net_set_status;
3974 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3975 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
2a083ffd 3976 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
7788c3f2 3977 vdc->post_load = virtio_net_post_load_virtio;
982b78c5 3978 vdc->vmsd = &vmstate_virtio_net_device;
9711cd0d 3979 vdc->primary_unplug_pending = primary_unplug_pending;
c255488d 3980 vdc->get_vhost = virtio_net_get_vhost;
cd9b8346 3981 vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
17ec5a86
FK
3982}
3983
3984static const TypeInfo virtio_net_info = {
3985 .name = TYPE_VIRTIO_NET,
3986 .parent = TYPE_VIRTIO_DEVICE,
3987 .instance_size = sizeof(VirtIONet),
3988 .instance_init = virtio_net_instance_init,
3989 .class_init = virtio_net_class_init,
3990};
3991
3992static void virtio_register_types(void)
3993{
3994 type_register_static(&virtio_net_info);
3995}
3996
3997type_init(virtio_register_types)