]> git.proxmox.com Git - mirror_qemu.git/blame - hw/net/virtio-net.c
virtio-net: prevent offloads reset on migration
[mirror_qemu.git] / hw / net / virtio-net.c
CommitLineData
fbe78f4f
AL
1/*
2 * Virtio Network Device
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
9b8bfe21 14#include "qemu/osdep.h"
1de7afc9 15#include "qemu/iov.h"
db725815 16#include "qemu/main-loop.h"
0b8fa32f 17#include "qemu/module.h"
0d09e41a 18#include "hw/virtio/virtio.h"
1422e32d 19#include "net/net.h"
7200ac3c 20#include "net/checksum.h"
a8ed73f7 21#include "net/tap.h"
1de7afc9
PB
22#include "qemu/error-report.h"
23#include "qemu/timer.h"
0d09e41a
PB
24#include "hw/virtio/virtio-net.h"
25#include "net/vhost_net.h"
9d8c6a25 26#include "net/announce.h"
17ec5a86 27#include "hw/virtio/virtio-bus.h"
e688df6b 28#include "qapi/error.h"
9af23989 29#include "qapi/qapi-events-net.h"
a27bd6c7 30#include "hw/qdev-properties.h"
1399c60d 31#include "hw/virtio/virtio-access.h"
f8d806c9 32#include "migration/misc.h"
9473939e 33#include "standard-headers/linux/ethtool.h"
2f780b6a 34#include "sysemu/sysemu.h"
9d8c6a25 35#include "trace.h"
fbe78f4f 36
0ce0e8f4 37#define VIRTIO_NET_VM_VERSION 11
b6503ed9 38
4ffb17f5 39#define MAC_TABLE_ENTRIES 64
f21c0ed9 40#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
9d6271b8 41
1c0fbfa3
MT
42/* previously fixed value */
43#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
9b02e161
WW
44#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
45
1c0fbfa3
MT
46/* for now, only allow larger queues; with virtio-1, guest can downsize */
47#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
9b02e161 48#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
1c0fbfa3 49
2974e916
YB
50#define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
51
52#define VIRTIO_NET_TCP_FLAG 0x3F
53#define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
54
55/* IPv4 max payload, 16 bits in the header */
56#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
57#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
58
59/* header length value in ip header without option */
60#define VIRTIO_NET_IP4_HEADER_LENGTH 5
61
62#define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
63#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
64
65/* Purge coalesced packets timer interval, This value affects the performance
66 a lot, and should be tuned carefully, '300000'(300us) is the recommended
67 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
68 tso/gso/gro 'off'. */
69#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
70
71/* temporary until standard header include it */
72#if !defined(VIRTIO_NET_HDR_F_RSC_INFO)
73
74#define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc_ext data in csum_ fields */
d47e5e31 75#define VIRTIO_NET_F_RSC_EXT 61
2974e916
YB
76
77static inline __virtio16 *virtio_net_rsc_ext_num_packets(
78 struct virtio_net_hdr *hdr)
79{
80 return &hdr->csum_start;
81}
82
83static inline __virtio16 *virtio_net_rsc_ext_num_dupacks(
84 struct virtio_net_hdr *hdr)
85{
86 return &hdr->csum_offset;
87}
88
89#endif
90
14f9b664 91static VirtIOFeature feature_sizes[] = {
127833ee 92 {.flags = 1ULL << VIRTIO_NET_F_MAC,
ba550851 93 .end = virtio_endof(struct virtio_net_config, mac)},
127833ee 94 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
ba550851 95 .end = virtio_endof(struct virtio_net_config, status)},
127833ee 96 {.flags = 1ULL << VIRTIO_NET_F_MQ,
ba550851 97 .end = virtio_endof(struct virtio_net_config, max_virtqueue_pairs)},
127833ee 98 {.flags = 1ULL << VIRTIO_NET_F_MTU,
ba550851 99 .end = virtio_endof(struct virtio_net_config, mtu)},
9473939e 100 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
ba550851 101 .end = virtio_endof(struct virtio_net_config, duplex)},
14f9b664
JL
102 {}
103};
104
fed699f9 105static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
0c87e93e
JW
106{
107 VirtIONet *n = qemu_get_nic_opaque(nc);
108
fed699f9 109 return &n->vqs[nc->queue_index];
0c87e93e 110}
fed699f9
JW
111
112static int vq2q(int queue_index)
113{
114 return queue_index / 2;
115}
116
fbe78f4f
AL
117/* TODO
118 * - we could suppress RX interrupt if we were so inclined.
119 */
120
0f03eca6 121static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
fbe78f4f 122{
17a0ca55 123 VirtIONet *n = VIRTIO_NET(vdev);
fbe78f4f
AL
124 struct virtio_net_config netcfg;
125
1399c60d
RR
126 virtio_stw_p(vdev, &netcfg.status, n->status);
127 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
a93e599d 128 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
79674068 129 memcpy(netcfg.mac, n->mac, ETH_ALEN);
9473939e
JB
130 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
131 netcfg.duplex = n->net_conf.duplex;
14f9b664 132 memcpy(config, &netcfg, n->config_size);
fbe78f4f
AL
133}
134
0f03eca6
AL
135static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
136{
17a0ca55 137 VirtIONet *n = VIRTIO_NET(vdev);
14f9b664 138 struct virtio_net_config netcfg = {};
0f03eca6 139
14f9b664 140 memcpy(&netcfg, config, n->config_size);
0f03eca6 141
95129d6f
CH
142 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
143 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
c1943a3f 144 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
79674068 145 memcpy(n->mac, netcfg.mac, ETH_ALEN);
b356f76d 146 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
0f03eca6
AL
147 }
148}
149
783e7706
MT
150static bool virtio_net_started(VirtIONet *n, uint8_t status)
151{
17a0ca55 152 VirtIODevice *vdev = VIRTIO_DEVICE(n);
783e7706 153 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
17a0ca55 154 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
783e7706
MT
155}
156
b2c929f0
DDAG
157static void virtio_net_announce_notify(VirtIONet *net)
158{
159 VirtIODevice *vdev = VIRTIO_DEVICE(net);
160 trace_virtio_net_announce_notify();
161
162 net->status |= VIRTIO_NET_S_ANNOUNCE;
163 virtio_notify_config(vdev);
164}
165
f57fcf70
JW
166static void virtio_net_announce_timer(void *opaque)
167{
168 VirtIONet *n = opaque;
9d8c6a25 169 trace_virtio_net_announce_timer(n->announce_timer.round);
f57fcf70 170
9d8c6a25 171 n->announce_timer.round--;
b2c929f0
DDAG
172 virtio_net_announce_notify(n);
173}
174
175static void virtio_net_announce(NetClientState *nc)
176{
177 VirtIONet *n = qemu_get_nic_opaque(nc);
178 VirtIODevice *vdev = VIRTIO_DEVICE(n);
179
180 /*
181 * Make sure the virtio migration announcement timer isn't running
182 * If it is, let it trigger announcement so that we do not cause
183 * confusion.
184 */
185 if (n->announce_timer.round) {
186 return;
187 }
188
189 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
190 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
191 virtio_net_announce_notify(n);
192 }
f57fcf70
JW
193}
194
783e7706 195static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
afbaa7b4 196{
17a0ca55 197 VirtIODevice *vdev = VIRTIO_DEVICE(n);
b356f76d 198 NetClientState *nc = qemu_get_queue(n->nic);
fed699f9 199 int queues = n->multiqueue ? n->max_queues : 1;
b356f76d 200
ed8b4afe 201 if (!get_vhost_net(nc->peer)) {
afbaa7b4
MT
202 return;
203 }
fed699f9 204
8c1ac475
RK
205 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
206 !!n->vhost_started) {
afbaa7b4
MT
207 return;
208 }
209 if (!n->vhost_started) {
086abc1c
MT
210 int r, i;
211
1bfa316c
GK
212 if (n->needs_vnet_hdr_swap) {
213 error_report("backend does not support %s vnet headers; "
214 "falling back on userspace virtio",
215 virtio_is_big_endian(vdev) ? "BE" : "LE");
216 return;
217 }
218
086abc1c
MT
219 /* Any packets outstanding? Purge them to avoid touching rings
220 * when vhost is running.
221 */
222 for (i = 0; i < queues; i++) {
223 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
224
225 /* Purge both directions: TX and RX. */
226 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
227 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
228 }
229
a93e599d
MC
230 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
231 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
232 if (r < 0) {
233 error_report("%uBytes MTU not supported by the backend",
234 n->net_conf.mtu);
235
236 return;
237 }
238 }
239
1830b80f 240 n->vhost_started = 1;
17a0ca55 241 r = vhost_net_start(vdev, n->nic->ncs, queues);
afbaa7b4 242 if (r < 0) {
e7b43f7e
SH
243 error_report("unable to start vhost net: %d: "
244 "falling back on userspace virtio", -r);
1830b80f 245 n->vhost_started = 0;
afbaa7b4
MT
246 }
247 } else {
17a0ca55 248 vhost_net_stop(vdev, n->nic->ncs, queues);
afbaa7b4
MT
249 n->vhost_started = 0;
250 }
251}
252
1bfa316c
GK
253static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
254 NetClientState *peer,
255 bool enable)
256{
257 if (virtio_is_big_endian(vdev)) {
258 return qemu_set_vnet_be(peer, enable);
259 } else {
260 return qemu_set_vnet_le(peer, enable);
261 }
262}
263
264static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
265 int queues, bool enable)
266{
267 int i;
268
269 for (i = 0; i < queues; i++) {
270 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
271 enable) {
272 while (--i >= 0) {
273 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
274 }
275
276 return true;
277 }
278 }
279
280 return false;
281}
282
283static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
284{
285 VirtIODevice *vdev = VIRTIO_DEVICE(n);
286 int queues = n->multiqueue ? n->max_queues : 1;
287
288 if (virtio_net_started(n, status)) {
289 /* Before using the device, we tell the network backend about the
290 * endianness to use when parsing vnet headers. If the backend
291 * can't do it, we fallback onto fixing the headers in the core
292 * virtio-net code.
293 */
294 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
295 queues, true);
296 } else if (virtio_net_started(n, vdev->status)) {
297 /* After using the device, we need to reset the network backend to
298 * the default (guest native endianness), otherwise the guest may
299 * lose network connectivity if it is rebooted into a different
300 * endianness.
301 */
302 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
303 }
304}
305
283e2c2a
YB
306static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
307{
308 unsigned int dropped = virtqueue_drop_all(vq);
309 if (dropped) {
310 virtio_notify(vdev, vq);
311 }
312}
313
783e7706
MT
314static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
315{
17a0ca55 316 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9
JW
317 VirtIONetQueue *q;
318 int i;
319 uint8_t queue_status;
783e7706 320
1bfa316c 321 virtio_net_vnet_endian_status(n, status);
783e7706
MT
322 virtio_net_vhost_status(n, status);
323
fed699f9 324 for (i = 0; i < n->max_queues; i++) {
38705bb5
FZ
325 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
326 bool queue_started;
fed699f9 327 q = &n->vqs[i];
783e7706 328
fed699f9
JW
329 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
330 queue_status = 0;
783e7706 331 } else {
fed699f9 332 queue_status = status;
783e7706 333 }
38705bb5
FZ
334 queue_started =
335 virtio_net_started(n, queue_status) && !n->vhost_started;
336
337 if (queue_started) {
338 qemu_flush_queued_packets(ncs);
339 }
fed699f9
JW
340
341 if (!q->tx_waiting) {
342 continue;
343 }
344
38705bb5 345 if (queue_started) {
fed699f9 346 if (q->tx_timer) {
bc72ad67
AB
347 timer_mod(q->tx_timer,
348 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
fed699f9
JW
349 } else {
350 qemu_bh_schedule(q->tx_bh);
351 }
783e7706 352 } else {
fed699f9 353 if (q->tx_timer) {
bc72ad67 354 timer_del(q->tx_timer);
fed699f9
JW
355 } else {
356 qemu_bh_cancel(q->tx_bh);
357 }
283e2c2a 358 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
70e53e6e
JW
359 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
360 vdev->vm_running) {
283e2c2a
YB
361 /* if tx is waiting we are likely have some packets in tx queue
362 * and disabled notification */
363 q->tx_waiting = 0;
364 virtio_queue_set_notification(q->tx_vq, 1);
365 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
366 }
783e7706
MT
367 }
368 }
369}
370
4e68f7a0 371static void virtio_net_set_link_status(NetClientState *nc)
554c97dd 372{
cc1f0f45 373 VirtIONet *n = qemu_get_nic_opaque(nc);
17a0ca55 374 VirtIODevice *vdev = VIRTIO_DEVICE(n);
554c97dd
AL
375 uint16_t old_status = n->status;
376
eb6b6c12 377 if (nc->link_down)
554c97dd
AL
378 n->status &= ~VIRTIO_NET_S_LINK_UP;
379 else
380 n->status |= VIRTIO_NET_S_LINK_UP;
381
382 if (n->status != old_status)
17a0ca55 383 virtio_notify_config(vdev);
afbaa7b4 384
17a0ca55 385 virtio_net_set_status(vdev, vdev->status);
554c97dd
AL
386}
387
b1be4280
AK
388static void rxfilter_notify(NetClientState *nc)
389{
b1be4280
AK
390 VirtIONet *n = qemu_get_nic_opaque(nc);
391
392 if (nc->rxfilter_notify_enabled) {
96e35046 393 gchar *path = object_get_canonical_path(OBJECT(n->qdev));
06150279 394 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
3ab72385 395 n->netclient_name, path);
96e35046 396 g_free(path);
b1be4280
AK
397
398 /* disable event notification to avoid events flooding */
399 nc->rxfilter_notify_enabled = 0;
400 }
401}
402
f7bc8ef8
AK
403static intList *get_vlan_table(VirtIONet *n)
404{
405 intList *list, *entry;
406 int i, j;
407
408 list = NULL;
409 for (i = 0; i < MAX_VLAN >> 5; i++) {
410 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
411 if (n->vlans[i] & (1U << j)) {
412 entry = g_malloc0(sizeof(*entry));
413 entry->value = (i << 5) + j;
414 entry->next = list;
415 list = entry;
416 }
417 }
418 }
419
420 return list;
421}
422
b1be4280
AK
423static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
424{
425 VirtIONet *n = qemu_get_nic_opaque(nc);
f7bc8ef8 426 VirtIODevice *vdev = VIRTIO_DEVICE(n);
b1be4280
AK
427 RxFilterInfo *info;
428 strList *str_list, *entry;
f7bc8ef8 429 int i;
b1be4280
AK
430
431 info = g_malloc0(sizeof(*info));
432 info->name = g_strdup(nc->name);
433 info->promiscuous = n->promisc;
434
435 if (n->nouni) {
436 info->unicast = RX_STATE_NONE;
437 } else if (n->alluni) {
438 info->unicast = RX_STATE_ALL;
439 } else {
440 info->unicast = RX_STATE_NORMAL;
441 }
442
443 if (n->nomulti) {
444 info->multicast = RX_STATE_NONE;
445 } else if (n->allmulti) {
446 info->multicast = RX_STATE_ALL;
447 } else {
448 info->multicast = RX_STATE_NORMAL;
449 }
450
451 info->broadcast_allowed = n->nobcast;
452 info->multicast_overflow = n->mac_table.multi_overflow;
453 info->unicast_overflow = n->mac_table.uni_overflow;
454
b0575ba4 455 info->main_mac = qemu_mac_strdup_printf(n->mac);
b1be4280
AK
456
457 str_list = NULL;
458 for (i = 0; i < n->mac_table.first_multi; i++) {
459 entry = g_malloc0(sizeof(*entry));
b0575ba4 460 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
b1be4280
AK
461 entry->next = str_list;
462 str_list = entry;
463 }
464 info->unicast_table = str_list;
465
466 str_list = NULL;
467 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
468 entry = g_malloc0(sizeof(*entry));
b0575ba4 469 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
b1be4280
AK
470 entry->next = str_list;
471 str_list = entry;
472 }
473 info->multicast_table = str_list;
f7bc8ef8 474 info->vlan_table = get_vlan_table(n);
b1be4280 475
95129d6f 476 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
f7bc8ef8
AK
477 info->vlan = RX_STATE_ALL;
478 } else if (!info->vlan_table) {
479 info->vlan = RX_STATE_NONE;
480 } else {
481 info->vlan = RX_STATE_NORMAL;
b1be4280 482 }
b1be4280
AK
483
484 /* enable event notification after query */
485 nc->rxfilter_notify_enabled = 1;
486
487 return info;
488}
489
002437cd
AL
490static void virtio_net_reset(VirtIODevice *vdev)
491{
17a0ca55 492 VirtIONet *n = VIRTIO_NET(vdev);
94b52958 493 int i;
002437cd
AL
494
495 /* Reset back to compatibility mode */
496 n->promisc = 1;
497 n->allmulti = 0;
015cb166
AW
498 n->alluni = 0;
499 n->nomulti = 0;
500 n->nouni = 0;
501 n->nobcast = 0;
fed699f9
JW
502 /* multiqueue is disabled by default */
503 n->curr_queues = 1;
9d8c6a25
DDAG
504 timer_del(n->announce_timer.tm);
505 n->announce_timer.round = 0;
f57fcf70 506 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
b6503ed9 507
f21c0ed9 508 /* Flush any MAC and VLAN filter table state */
b6503ed9 509 n->mac_table.in_use = 0;
2d9aba39 510 n->mac_table.first_multi = 0;
8fd2a2f1
AW
511 n->mac_table.multi_overflow = 0;
512 n->mac_table.uni_overflow = 0;
b6503ed9 513 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
41dc8a67 514 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
702d66a8 515 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
f21c0ed9 516 memset(n->vlans, 0, MAX_VLAN >> 3);
94b52958
GK
517
518 /* Flush any async TX */
519 for (i = 0; i < n->max_queues; i++) {
520 NetClientState *nc = qemu_get_subqueue(n->nic, i);
521
522 if (nc->peer) {
523 qemu_flush_or_purge_queued_packets(nc->peer, true);
524 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
525 }
526 }
002437cd
AL
527}
528
6e371ab8 529static void peer_test_vnet_hdr(VirtIONet *n)
3a330134 530{
b356f76d
JW
531 NetClientState *nc = qemu_get_queue(n->nic);
532 if (!nc->peer) {
6e371ab8 533 return;
b356f76d 534 }
3a330134 535
d6085e3a 536 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
6e371ab8 537}
3a330134 538
6e371ab8
MT
539static int peer_has_vnet_hdr(VirtIONet *n)
540{
3a330134
MM
541 return n->has_vnet_hdr;
542}
543
0ce0e8f4
MM
544static int peer_has_ufo(VirtIONet *n)
545{
546 if (!peer_has_vnet_hdr(n))
547 return 0;
548
d6085e3a 549 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
0ce0e8f4
MM
550
551 return n->has_ufo;
552}
553
bb9d17f8
CH
554static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
555 int version_1)
ff3a8066 556{
fed699f9
JW
557 int i;
558 NetClientState *nc;
559
ff3a8066
MT
560 n->mergeable_rx_bufs = mergeable_rx_bufs;
561
bb9d17f8
CH
562 if (version_1) {
563 n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
564 } else {
565 n->guest_hdr_len = n->mergeable_rx_bufs ?
566 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
567 sizeof(struct virtio_net_hdr);
568 }
ff3a8066 569
fed699f9
JW
570 for (i = 0; i < n->max_queues; i++) {
571 nc = qemu_get_subqueue(n->nic, i);
572
573 if (peer_has_vnet_hdr(n) &&
d6085e3a
SH
574 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
575 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
fed699f9
JW
576 n->host_hdr_len = n->guest_hdr_len;
577 }
ff3a8066
MT
578 }
579}
580
2eef278b
MT
581static int virtio_net_max_tx_queue_size(VirtIONet *n)
582{
583 NetClientState *peer = n->nic_conf.peers.ncs[0];
584
585 /*
586 * Backends other than vhost-user don't support max queue size.
587 */
588 if (!peer) {
589 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
590 }
591
592 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
593 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
594 }
595
596 return VIRTQUEUE_MAX_SIZE;
597}
598
fed699f9
JW
599static int peer_attach(VirtIONet *n, int index)
600{
601 NetClientState *nc = qemu_get_subqueue(n->nic, index);
602
603 if (!nc->peer) {
604 return 0;
605 }
606
f394b2e2 607 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
7263a0ad
CO
608 vhost_set_vring_enable(nc->peer, 1);
609 }
610
f394b2e2 611 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
fed699f9
JW
612 return 0;
613 }
614
1074b879
JW
615 if (n->max_queues == 1) {
616 return 0;
617 }
618
fed699f9
JW
619 return tap_enable(nc->peer);
620}
621
622static int peer_detach(VirtIONet *n, int index)
623{
624 NetClientState *nc = qemu_get_subqueue(n->nic, index);
625
626 if (!nc->peer) {
627 return 0;
628 }
629
f394b2e2 630 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
7263a0ad
CO
631 vhost_set_vring_enable(nc->peer, 0);
632 }
633
f394b2e2 634 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
fed699f9
JW
635 return 0;
636 }
637
638 return tap_disable(nc->peer);
639}
640
641static void virtio_net_set_queues(VirtIONet *n)
642{
643 int i;
ddfa83ea 644 int r;
fed699f9 645
68b5f314
YB
646 if (n->nic->peer_deleted) {
647 return;
648 }
649
fed699f9
JW
650 for (i = 0; i < n->max_queues; i++) {
651 if (i < n->curr_queues) {
ddfa83ea
JS
652 r = peer_attach(n, i);
653 assert(!r);
fed699f9 654 } else {
ddfa83ea
JS
655 r = peer_detach(n, i);
656 assert(!r);
fed699f9
JW
657 }
658 }
659}
660
ec57db16 661static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
fed699f9 662
9d5b731d
JW
663static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
664 Error **errp)
fbe78f4f 665{
17a0ca55 666 VirtIONet *n = VIRTIO_NET(vdev);
b356f76d 667 NetClientState *nc = qemu_get_queue(n->nic);
fbe78f4f 668
da3e8a23
SZ
669 /* Firstly sync all virtio-net possible supported features */
670 features |= n->host_features;
671
0cd09c3a 672 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
c9f79a3f 673
6e371ab8 674 if (!peer_has_vnet_hdr(n)) {
0cd09c3a
CH
675 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
676 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
677 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
678 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
8172539d 679
0cd09c3a
CH
680 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
681 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
682 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
683 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
8172539d 684 }
3a330134 685
8172539d 686 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
0cd09c3a
CH
687 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
688 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
3a330134
MM
689 }
690
ed8b4afe 691 if (!get_vhost_net(nc->peer)) {
9bc6304c
MT
692 return features;
693 }
2974e916 694
75ebec11
MC
695 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
696 vdev->backend_features = features;
697
698 if (n->mtu_bypass_backend &&
699 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
700 features |= (1ULL << VIRTIO_NET_F_MTU);
701 }
702
703 return features;
fbe78f4f
AL
704}
705
019a3edb 706static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
8eca6b1b 707{
019a3edb 708 uint64_t features = 0;
8eca6b1b
AL
709
710 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
711 * but also these: */
0cd09c3a
CH
712 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
713 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
714 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
715 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
716 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
8eca6b1b 717
8172539d 718 return features;
8eca6b1b
AL
719}
720
644c9858
DF
721static void virtio_net_apply_guest_offloads(VirtIONet *n)
722{
ad37bb3b 723 qemu_set_offload(qemu_get_queue(n->nic)->peer,
644c9858
DF
724 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
725 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
726 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
727 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
728 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
729}
730
731static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
732{
733 static const uint64_t guest_offloads_mask =
734 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
735 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
736 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
737 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
738 (1ULL << VIRTIO_NET_F_GUEST_UFO);
739
740 return guest_offloads_mask & features;
741}
742
743static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
744{
745 VirtIODevice *vdev = VIRTIO_DEVICE(n);
746 return virtio_net_guest_offloads_by_features(vdev->guest_features);
747}
748
d5aaa1b0 749static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
fbe78f4f 750{
17a0ca55 751 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9
JW
752 int i;
753
75ebec11
MC
754 if (n->mtu_bypass_backend &&
755 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
756 features &= ~(1ULL << VIRTIO_NET_F_MTU);
757 }
758
ef546f12 759 virtio_net_set_multiqueue(n,
95129d6f 760 virtio_has_feature(features, VIRTIO_NET_F_MQ));
fbe78f4f 761
ef546f12 762 virtio_net_set_mrg_rx_bufs(n,
95129d6f
CH
763 virtio_has_feature(features,
764 VIRTIO_NET_F_MRG_RXBUF),
765 virtio_has_feature(features,
766 VIRTIO_F_VERSION_1));
f5436dd9 767
2974e916
YB
768 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
769 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
770 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
771 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
772
f5436dd9 773 if (n->has_vnet_hdr) {
644c9858
DF
774 n->curr_guest_offloads =
775 virtio_net_guest_offloads_by_features(features);
776 virtio_net_apply_guest_offloads(n);
f5436dd9 777 }
fed699f9
JW
778
779 for (i = 0; i < n->max_queues; i++) {
780 NetClientState *nc = qemu_get_subqueue(n->nic, i);
781
ed8b4afe 782 if (!get_vhost_net(nc->peer)) {
fed699f9
JW
783 continue;
784 }
ed8b4afe 785 vhost_net_ack_features(get_vhost_net(nc->peer), features);
dc14a397 786 }
0b1eaa88 787
95129d6f 788 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
0b1eaa88
SF
789 memset(n->vlans, 0, MAX_VLAN >> 3);
790 } else {
791 memset(n->vlans, 0xff, MAX_VLAN >> 3);
792 }
fbe78f4f
AL
793}
794
002437cd 795static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
921ac5d0 796 struct iovec *iov, unsigned int iov_cnt)
002437cd
AL
797{
798 uint8_t on;
921ac5d0 799 size_t s;
b1be4280 800 NetClientState *nc = qemu_get_queue(n->nic);
002437cd 801
921ac5d0
MT
802 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
803 if (s != sizeof(on)) {
804 return VIRTIO_NET_ERR;
002437cd
AL
805 }
806
dd23454b 807 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
002437cd 808 n->promisc = on;
dd23454b 809 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
002437cd 810 n->allmulti = on;
dd23454b 811 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
015cb166 812 n->alluni = on;
dd23454b 813 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
015cb166 814 n->nomulti = on;
dd23454b 815 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
015cb166 816 n->nouni = on;
dd23454b 817 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
015cb166 818 n->nobcast = on;
921ac5d0 819 } else {
002437cd 820 return VIRTIO_NET_ERR;
921ac5d0 821 }
002437cd 822
b1be4280
AK
823 rxfilter_notify(nc);
824
002437cd
AL
825 return VIRTIO_NET_OK;
826}
827
644c9858
DF
828static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
829 struct iovec *iov, unsigned int iov_cnt)
830{
831 VirtIODevice *vdev = VIRTIO_DEVICE(n);
832 uint64_t offloads;
833 size_t s;
834
95129d6f 835 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
644c9858
DF
836 return VIRTIO_NET_ERR;
837 }
838
839 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
840 if (s != sizeof(offloads)) {
841 return VIRTIO_NET_ERR;
842 }
843
844 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
845 uint64_t supported_offloads;
846
189ae6bb
JW
847 offloads = virtio_ldq_p(vdev, &offloads);
848
644c9858
DF
849 if (!n->has_vnet_hdr) {
850 return VIRTIO_NET_ERR;
851 }
852
2974e916
YB
853 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
854 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
855 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
856 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
857 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
858
644c9858
DF
859 supported_offloads = virtio_net_supported_guest_offloads(n);
860 if (offloads & ~supported_offloads) {
861 return VIRTIO_NET_ERR;
862 }
863
864 n->curr_guest_offloads = offloads;
865 virtio_net_apply_guest_offloads(n);
866
867 return VIRTIO_NET_OK;
868 } else {
869 return VIRTIO_NET_ERR;
870 }
871}
872
b6503ed9 873static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
921ac5d0 874 struct iovec *iov, unsigned int iov_cnt)
b6503ed9 875{
1399c60d 876 VirtIODevice *vdev = VIRTIO_DEVICE(n);
b6503ed9 877 struct virtio_net_ctrl_mac mac_data;
921ac5d0 878 size_t s;
b1be4280 879 NetClientState *nc = qemu_get_queue(n->nic);
b6503ed9 880
c1943a3f
AK
881 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
882 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
883 return VIRTIO_NET_ERR;
884 }
885 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
886 assert(s == sizeof(n->mac));
b356f76d 887 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
b1be4280
AK
888 rxfilter_notify(nc);
889
c1943a3f
AK
890 return VIRTIO_NET_OK;
891 }
892
921ac5d0 893 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
b6503ed9 894 return VIRTIO_NET_ERR;
921ac5d0 895 }
b6503ed9 896
cae2e556
AK
897 int in_use = 0;
898 int first_multi = 0;
899 uint8_t uni_overflow = 0;
900 uint8_t multi_overflow = 0;
901 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
b6503ed9 902
921ac5d0
MT
903 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
904 sizeof(mac_data.entries));
1399c60d 905 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
921ac5d0 906 if (s != sizeof(mac_data.entries)) {
b1be4280 907 goto error;
921ac5d0
MT
908 }
909 iov_discard_front(&iov, &iov_cnt, s);
b6503ed9 910
921ac5d0 911 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
b1be4280 912 goto error;
921ac5d0 913 }
b6503ed9
AL
914
915 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
cae2e556 916 s = iov_to_buf(iov, iov_cnt, 0, macs,
921ac5d0
MT
917 mac_data.entries * ETH_ALEN);
918 if (s != mac_data.entries * ETH_ALEN) {
b1be4280 919 goto error;
921ac5d0 920 }
cae2e556 921 in_use += mac_data.entries;
b6503ed9 922 } else {
cae2e556 923 uni_overflow = 1;
b6503ed9
AL
924 }
925
921ac5d0
MT
926 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
927
cae2e556 928 first_multi = in_use;
2d9aba39 929
921ac5d0
MT
930 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
931 sizeof(mac_data.entries));
1399c60d 932 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
921ac5d0 933 if (s != sizeof(mac_data.entries)) {
b1be4280 934 goto error;
921ac5d0
MT
935 }
936
937 iov_discard_front(&iov, &iov_cnt, s);
b6503ed9 938
921ac5d0 939 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
b1be4280 940 goto error;
921ac5d0 941 }
b6503ed9 942
edc24385 943 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
cae2e556 944 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
921ac5d0
MT
945 mac_data.entries * ETH_ALEN);
946 if (s != mac_data.entries * ETH_ALEN) {
b1be4280 947 goto error;
8fd2a2f1 948 }
cae2e556 949 in_use += mac_data.entries;
921ac5d0 950 } else {
cae2e556 951 multi_overflow = 1;
b6503ed9
AL
952 }
953
cae2e556
AK
954 n->mac_table.in_use = in_use;
955 n->mac_table.first_multi = first_multi;
956 n->mac_table.uni_overflow = uni_overflow;
957 n->mac_table.multi_overflow = multi_overflow;
958 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
959 g_free(macs);
b1be4280
AK
960 rxfilter_notify(nc);
961
b6503ed9 962 return VIRTIO_NET_OK;
b1be4280
AK
963
964error:
cae2e556 965 g_free(macs);
b1be4280 966 return VIRTIO_NET_ERR;
b6503ed9
AL
967}
968
f21c0ed9 969static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
921ac5d0 970 struct iovec *iov, unsigned int iov_cnt)
f21c0ed9 971{
1399c60d 972 VirtIODevice *vdev = VIRTIO_DEVICE(n);
f21c0ed9 973 uint16_t vid;
921ac5d0 974 size_t s;
b1be4280 975 NetClientState *nc = qemu_get_queue(n->nic);
f21c0ed9 976
921ac5d0 977 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1399c60d 978 vid = virtio_lduw_p(vdev, &vid);
921ac5d0 979 if (s != sizeof(vid)) {
f21c0ed9
AL
980 return VIRTIO_NET_ERR;
981 }
982
f21c0ed9
AL
983 if (vid >= MAX_VLAN)
984 return VIRTIO_NET_ERR;
985
986 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
987 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
988 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
989 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
990 else
991 return VIRTIO_NET_ERR;
992
b1be4280
AK
993 rxfilter_notify(nc);
994
f21c0ed9
AL
995 return VIRTIO_NET_OK;
996}
997
f57fcf70
JW
998static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
999 struct iovec *iov, unsigned int iov_cnt)
1000{
9d8c6a25 1001 trace_virtio_net_handle_announce(n->announce_timer.round);
f57fcf70
JW
1002 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1003 n->status & VIRTIO_NET_S_ANNOUNCE) {
1004 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
9d8c6a25
DDAG
1005 if (n->announce_timer.round) {
1006 qemu_announce_timer_step(&n->announce_timer);
f57fcf70
JW
1007 }
1008 return VIRTIO_NET_OK;
1009 } else {
1010 return VIRTIO_NET_ERR;
1011 }
1012}
1013
fed699f9 1014static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
f8f7c533 1015 struct iovec *iov, unsigned int iov_cnt)
fed699f9 1016{
17a0ca55 1017 VirtIODevice *vdev = VIRTIO_DEVICE(n);
f8f7c533
JW
1018 struct virtio_net_ctrl_mq mq;
1019 size_t s;
1020 uint16_t queues;
fed699f9 1021
f8f7c533
JW
1022 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1023 if (s != sizeof(mq)) {
fed699f9
JW
1024 return VIRTIO_NET_ERR;
1025 }
1026
1027 if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1028 return VIRTIO_NET_ERR;
1029 }
1030
1399c60d 1031 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
fed699f9 1032
f8f7c533
JW
1033 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1034 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1035 queues > n->max_queues ||
fed699f9
JW
1036 !n->multiqueue) {
1037 return VIRTIO_NET_ERR;
1038 }
1039
f8f7c533 1040 n->curr_queues = queues;
fed699f9
JW
1041 /* stop the backend before changing the number of queues to avoid handling a
1042 * disabled queue */
17a0ca55 1043 virtio_net_set_status(vdev, vdev->status);
fed699f9
JW
1044 virtio_net_set_queues(n);
1045
1046 return VIRTIO_NET_OK;
1047}
ba7eadb5 1048
3d11d36c
AL
1049static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1050{
17a0ca55 1051 VirtIONet *n = VIRTIO_NET(vdev);
3d11d36c
AL
1052 struct virtio_net_ctrl_hdr ctrl;
1053 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
51b19ebe 1054 VirtQueueElement *elem;
921ac5d0 1055 size_t s;
771b6ed3 1056 struct iovec *iov, *iov2;
921ac5d0 1057 unsigned int iov_cnt;
3d11d36c 1058
51b19ebe
PB
1059 for (;;) {
1060 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1061 if (!elem) {
1062 break;
1063 }
1064 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1065 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
ba7eadb5
GK
1066 virtio_error(vdev, "virtio-net ctrl missing headers");
1067 virtqueue_detach_element(vq, elem, 0);
1068 g_free(elem);
1069 break;
3d11d36c
AL
1070 }
1071
51b19ebe
PB
1072 iov_cnt = elem->out_num;
1073 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
921ac5d0
MT
1074 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1075 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1076 if (s != sizeof(ctrl)) {
1077 status = VIRTIO_NET_ERR;
dd23454b 1078 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
921ac5d0
MT
1079 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1080 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1081 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1082 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1083 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
f57fcf70
JW
1084 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1085 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
fed699f9 1086 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
f8f7c533 1087 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
644c9858
DF
1088 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1089 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
3d11d36c
AL
1090 }
1091
51b19ebe 1092 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
921ac5d0 1093 assert(s == sizeof(status));
3d11d36c 1094
51b19ebe 1095 virtqueue_push(vq, elem, sizeof(status));
3d11d36c 1096 virtio_notify(vdev, vq);
771b6ed3 1097 g_free(iov2);
51b19ebe 1098 g_free(elem);
3d11d36c
AL
1099 }
1100}
1101
fbe78f4f
AL
1102/* RX */
1103
1104static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1105{
17a0ca55 1106 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 1107 int queue_index = vq2q(virtio_get_queue_index(vq));
8aeff62d 1108
fed699f9 1109 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
fbe78f4f
AL
1110}
1111
4e68f7a0 1112static int virtio_net_can_receive(NetClientState *nc)
fbe78f4f 1113{
cc1f0f45 1114 VirtIONet *n = qemu_get_nic_opaque(nc);
17a0ca55 1115 VirtIODevice *vdev = VIRTIO_DEVICE(n);
fed699f9 1116 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
0c87e93e 1117
17a0ca55 1118 if (!vdev->vm_running) {
95477323
MT
1119 return 0;
1120 }
cdd5cc12 1121
fed699f9
JW
1122 if (nc->queue_index >= n->curr_queues) {
1123 return 0;
1124 }
1125
0c87e93e 1126 if (!virtio_queue_ready(q->rx_vq) ||
17a0ca55 1127 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
fbe78f4f 1128 return 0;
0c87e93e 1129 }
fbe78f4f 1130
cdd5cc12
MM
1131 return 1;
1132}
1133
0c87e93e 1134static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
cdd5cc12 1135{
0c87e93e
JW
1136 VirtIONet *n = q->n;
1137 if (virtio_queue_empty(q->rx_vq) ||
fbe78f4f 1138 (n->mergeable_rx_bufs &&
0c87e93e
JW
1139 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1140 virtio_queue_set_notification(q->rx_vq, 1);
06b12970
TL
1141
1142 /* To avoid a race condition where the guest has made some buffers
1143 * available after the above check but before notification was
1144 * enabled, check for available buffers again.
1145 */
0c87e93e 1146 if (virtio_queue_empty(q->rx_vq) ||
06b12970 1147 (n->mergeable_rx_bufs &&
0c87e93e 1148 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
06b12970 1149 return 0;
0c87e93e 1150 }
fbe78f4f
AL
1151 }
1152
0c87e93e 1153 virtio_queue_set_notification(q->rx_vq, 0);
fbe78f4f
AL
1154 return 1;
1155}
1156
1399c60d 1157static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
032a74a1 1158{
1399c60d
RR
1159 virtio_tswap16s(vdev, &hdr->hdr_len);
1160 virtio_tswap16s(vdev, &hdr->gso_size);
1161 virtio_tswap16s(vdev, &hdr->csum_start);
1162 virtio_tswap16s(vdev, &hdr->csum_offset);
032a74a1
CLG
1163}
1164
1d41b0c1
AL
1165/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1166 * it never finds out that the packets don't have valid checksums. This
1167 * causes dhclient to get upset. Fedora's carried a patch for ages to
1168 * fix this with Xen but it hasn't appeared in an upstream release of
1169 * dhclient yet.
1170 *
1171 * To avoid breaking existing guests, we catch udp packets and add
1172 * checksums. This is terrible but it's better than hacking the guest
1173 * kernels.
1174 *
1175 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1176 * we should provide a mechanism to disable it to avoid polluting the host
1177 * cache.
1178 */
1179static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
22cc84db 1180 uint8_t *buf, size_t size)
1d41b0c1
AL
1181{
1182 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1183 (size > 27 && size < 1500) && /* normal sized MTU */
1184 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1185 (buf[23] == 17) && /* ip.protocol == UDP */
1186 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
22cc84db 1187 net_checksum_calculate(buf, size);
1d41b0c1
AL
1188 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1189 }
1190}
1191
280598b7
MT
1192static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1193 const void *buf, size_t size)
fbe78f4f 1194{
3a330134 1195 if (n->has_vnet_hdr) {
22cc84db
MT
1196 /* FIXME this cast is evil */
1197 void *wbuf = (void *)buf;
280598b7
MT
1198 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1199 size - n->host_hdr_len);
1bfa316c
GK
1200
1201 if (n->needs_vnet_hdr_swap) {
1202 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1203 }
280598b7 1204 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
22cc84db
MT
1205 } else {
1206 struct virtio_net_hdr hdr = {
1207 .flags = 0,
1208 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1209 };
1210 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
3a330134 1211 }
fbe78f4f
AL
1212}
1213
3831ab20
AL
1214static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1215{
1216 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
f21c0ed9 1217 static const uint8_t vlan[] = {0x81, 0x00};
3831ab20 1218 uint8_t *ptr = (uint8_t *)buf;
b6503ed9 1219 int i;
3831ab20
AL
1220
1221 if (n->promisc)
1222 return 1;
1223
e043ebc6 1224 ptr += n->host_hdr_len;
3a330134 1225
f21c0ed9 1226 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
7542d3e7 1227 int vid = lduw_be_p(ptr + 14) & 0xfff;
f21c0ed9
AL
1228 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1229 return 0;
1230 }
1231
bbe2f399
AW
1232 if (ptr[0] & 1) { // multicast
1233 if (!memcmp(ptr, bcast, sizeof(bcast))) {
015cb166
AW
1234 return !n->nobcast;
1235 } else if (n->nomulti) {
1236 return 0;
8fd2a2f1 1237 } else if (n->allmulti || n->mac_table.multi_overflow) {
bbe2f399
AW
1238 return 1;
1239 }
2d9aba39
AW
1240
1241 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1242 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1243 return 1;
1244 }
1245 }
bbe2f399 1246 } else { // unicast
015cb166
AW
1247 if (n->nouni) {
1248 return 0;
1249 } else if (n->alluni || n->mac_table.uni_overflow) {
8fd2a2f1
AW
1250 return 1;
1251 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
bbe2f399
AW
1252 return 1;
1253 }
3831ab20 1254
2d9aba39
AW
1255 for (i = 0; i < n->mac_table.first_multi; i++) {
1256 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1257 return 1;
1258 }
1259 }
b6503ed9
AL
1260 }
1261
3831ab20
AL
1262 return 0;
1263}
1264
97cd965c
PB
1265static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1266 size_t size)
fbe78f4f 1267{
cc1f0f45 1268 VirtIONet *n = qemu_get_nic_opaque(nc);
fed699f9 1269 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
17a0ca55 1270 VirtIODevice *vdev = VIRTIO_DEVICE(n);
63c58728
MT
1271 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1272 struct virtio_net_hdr_mrg_rxbuf mhdr;
1273 unsigned mhdr_cnt = 0;
22cc84db 1274 size_t offset, i, guest_offset;
fbe78f4f 1275
fed699f9 1276 if (!virtio_net_can_receive(nc)) {
cdd5cc12 1277 return -1;
b356f76d 1278 }
cdd5cc12 1279
940cda94 1280 /* hdr_len refers to the header we supply to the guest */
0c87e93e 1281 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
8aeff62d 1282 return 0;
0c87e93e 1283 }
fbe78f4f 1284
3831ab20 1285 if (!receive_filter(n, buf, size))
4f1c942b 1286 return size;
3831ab20 1287
fbe78f4f
AL
1288 offset = i = 0;
1289
1290 while (offset < size) {
51b19ebe 1291 VirtQueueElement *elem;
fbe78f4f 1292 int len, total;
51b19ebe 1293 const struct iovec *sg;
fbe78f4f 1294
22c253d9 1295 total = 0;
fbe78f4f 1296
51b19ebe
PB
1297 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1298 if (!elem) {
ba10b9c0
GK
1299 if (i) {
1300 virtio_error(vdev, "virtio-net unexpected empty queue: "
1301 "i %zd mergeable %d offset %zd, size %zd, "
1302 "guest hdr len %zd, host hdr len %zd "
1303 "guest features 0x%" PRIx64,
1304 i, n->mergeable_rx_bufs, offset, size,
1305 n->guest_hdr_len, n->host_hdr_len,
1306 vdev->guest_features);
1307 }
1308 return -1;
fbe78f4f
AL
1309 }
1310
51b19ebe 1311 if (elem->in_num < 1) {
ba10b9c0
GK
1312 virtio_error(vdev,
1313 "virtio-net receive queue contains no in buffers");
1314 virtqueue_detach_element(q->rx_vq, elem, 0);
1315 g_free(elem);
1316 return -1;
fbe78f4f
AL
1317 }
1318
51b19ebe 1319 sg = elem->in_sg;
fbe78f4f 1320 if (i == 0) {
c8d28e7e 1321 assert(offset == 0);
63c58728
MT
1322 if (n->mergeable_rx_bufs) {
1323 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
51b19ebe 1324 sg, elem->in_num,
63c58728
MT
1325 offsetof(typeof(mhdr), num_buffers),
1326 sizeof(mhdr.num_buffers));
1327 }
fbe78f4f 1328
51b19ebe 1329 receive_header(n, sg, elem->in_num, buf, size);
c8d28e7e 1330 offset = n->host_hdr_len;
e35e23f6 1331 total += n->guest_hdr_len;
22cc84db
MT
1332 guest_offset = n->guest_hdr_len;
1333 } else {
1334 guest_offset = 0;
fbe78f4f
AL
1335 }
1336
1337 /* copy in packet. ugh */
51b19ebe 1338 len = iov_from_buf(sg, elem->in_num, guest_offset,
dcf6f5e1 1339 buf + offset, size - offset);
fbe78f4f 1340 total += len;
279a4253
MT
1341 offset += len;
1342 /* If buffers can't be merged, at this point we
1343 * must have consumed the complete packet.
1344 * Otherwise, drop it. */
1345 if (!n->mergeable_rx_bufs && offset < size) {
27e57efe 1346 virtqueue_unpop(q->rx_vq, elem, total);
51b19ebe 1347 g_free(elem);
279a4253
MT
1348 return size;
1349 }
fbe78f4f
AL
1350
1351 /* signal other side */
51b19ebe
PB
1352 virtqueue_fill(q->rx_vq, elem, total, i++);
1353 g_free(elem);
fbe78f4f
AL
1354 }
1355
63c58728 1356 if (mhdr_cnt) {
1399c60d 1357 virtio_stw_p(vdev, &mhdr.num_buffers, i);
63c58728
MT
1358 iov_from_buf(mhdr_sg, mhdr_cnt,
1359 0,
1360 &mhdr.num_buffers, sizeof mhdr.num_buffers);
44b15bc5 1361 }
fbe78f4f 1362
0c87e93e 1363 virtqueue_flush(q->rx_vq, i);
17a0ca55 1364 virtio_notify(vdev, q->rx_vq);
4f1c942b
MM
1365
1366 return size;
fbe78f4f
AL
1367}
1368
2974e916 1369static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
97cd965c
PB
1370 size_t size)
1371{
1372 ssize_t r;
1373
1374 rcu_read_lock();
1375 r = virtio_net_receive_rcu(nc, buf, size);
1376 rcu_read_unlock();
1377 return r;
1378}
1379
2974e916
YB
1380static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1381 const uint8_t *buf,
1382 VirtioNetRscUnit *unit)
1383{
1384 uint16_t ip_hdrlen;
1385 struct ip_header *ip;
1386
1387 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1388 + sizeof(struct eth_header));
1389 unit->ip = (void *)ip;
1390 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1391 unit->ip_plen = &ip->ip_len;
1392 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1393 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1394 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1395}
1396
1397static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1398 const uint8_t *buf,
1399 VirtioNetRscUnit *unit)
1400{
1401 struct ip6_header *ip6;
1402
1403 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1404 + sizeof(struct eth_header));
1405 unit->ip = ip6;
1406 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1407 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
1408 + sizeof(struct ip6_header));
1409 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1410
1411 /* There is a difference between payload lenght in ipv4 and v6,
1412 ip header is excluded in ipv6 */
1413 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1414}
1415
1416static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1417 VirtioNetRscSeg *seg)
1418{
1419 int ret;
1420 struct virtio_net_hdr *h;
1421
1422 h = (struct virtio_net_hdr *)seg->buf;
1423 h->flags = 0;
1424 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1425
1426 if (seg->is_coalesced) {
1427 *virtio_net_rsc_ext_num_packets(h) = seg->packets;
1428 *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack;
1429 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1430 if (chain->proto == ETH_P_IP) {
1431 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1432 } else {
1433 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1434 }
1435 }
1436
1437 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1438 QTAILQ_REMOVE(&chain->buffers, seg, next);
1439 g_free(seg->buf);
1440 g_free(seg);
1441
1442 return ret;
1443}
1444
1445static void virtio_net_rsc_purge(void *opq)
1446{
1447 VirtioNetRscSeg *seg, *rn;
1448 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1449
1450 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1451 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1452 chain->stat.purge_failed++;
1453 continue;
1454 }
1455 }
1456
1457 chain->stat.timer++;
1458 if (!QTAILQ_EMPTY(&chain->buffers)) {
1459 timer_mod(chain->drain_timer,
1460 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1461 }
1462}
1463
1464static void virtio_net_rsc_cleanup(VirtIONet *n)
1465{
1466 VirtioNetRscChain *chain, *rn_chain;
1467 VirtioNetRscSeg *seg, *rn_seg;
1468
1469 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1470 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1471 QTAILQ_REMOVE(&chain->buffers, seg, next);
1472 g_free(seg->buf);
1473 g_free(seg);
1474 }
1475
1476 timer_del(chain->drain_timer);
1477 timer_free(chain->drain_timer);
1478 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1479 g_free(chain);
1480 }
1481}
1482
1483static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1484 NetClientState *nc,
1485 const uint8_t *buf, size_t size)
1486{
1487 uint16_t hdr_len;
1488 VirtioNetRscSeg *seg;
1489
1490 hdr_len = chain->n->guest_hdr_len;
1491 seg = g_malloc(sizeof(VirtioNetRscSeg));
1492 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1493 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1494 memcpy(seg->buf, buf, size);
1495 seg->size = size;
1496 seg->packets = 1;
1497 seg->dup_ack = 0;
1498 seg->is_coalesced = 0;
1499 seg->nc = nc;
1500
1501 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1502 chain->stat.cache++;
1503
1504 switch (chain->proto) {
1505 case ETH_P_IP:
1506 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1507 break;
1508 case ETH_P_IPV6:
1509 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1510 break;
1511 default:
1512 g_assert_not_reached();
1513 }
1514}
1515
1516static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1517 VirtioNetRscSeg *seg,
1518 const uint8_t *buf,
1519 struct tcp_header *n_tcp,
1520 struct tcp_header *o_tcp)
1521{
1522 uint32_t nack, oack;
1523 uint16_t nwin, owin;
1524
1525 nack = htonl(n_tcp->th_ack);
1526 nwin = htons(n_tcp->th_win);
1527 oack = htonl(o_tcp->th_ack);
1528 owin = htons(o_tcp->th_win);
1529
1530 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1531 chain->stat.ack_out_of_win++;
1532 return RSC_FINAL;
1533 } else if (nack == oack) {
1534 /* duplicated ack or window probe */
1535 if (nwin == owin) {
1536 /* duplicated ack, add dup ack count due to whql test up to 1 */
1537 chain->stat.dup_ack++;
1538 return RSC_FINAL;
1539 } else {
1540 /* Coalesce window update */
1541 o_tcp->th_win = n_tcp->th_win;
1542 chain->stat.win_update++;
1543 return RSC_COALESCE;
1544 }
1545 } else {
1546 /* pure ack, go to 'C', finalize*/
1547 chain->stat.pure_ack++;
1548 return RSC_FINAL;
1549 }
1550}
1551
1552static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1553 VirtioNetRscSeg *seg,
1554 const uint8_t *buf,
1555 VirtioNetRscUnit *n_unit)
1556{
1557 void *data;
1558 uint16_t o_ip_len;
1559 uint32_t nseq, oseq;
1560 VirtioNetRscUnit *o_unit;
1561
1562 o_unit = &seg->unit;
1563 o_ip_len = htons(*o_unit->ip_plen);
1564 nseq = htonl(n_unit->tcp->th_seq);
1565 oseq = htonl(o_unit->tcp->th_seq);
1566
1567 /* out of order or retransmitted. */
1568 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1569 chain->stat.data_out_of_win++;
1570 return RSC_FINAL;
1571 }
1572
1573 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1574 if (nseq == oseq) {
1575 if ((o_unit->payload == 0) && n_unit->payload) {
1576 /* From no payload to payload, normal case, not a dup ack or etc */
1577 chain->stat.data_after_pure_ack++;
1578 goto coalesce;
1579 } else {
1580 return virtio_net_rsc_handle_ack(chain, seg, buf,
1581 n_unit->tcp, o_unit->tcp);
1582 }
1583 } else if ((nseq - oseq) != o_unit->payload) {
1584 /* Not a consistent packet, out of order */
1585 chain->stat.data_out_of_order++;
1586 return RSC_FINAL;
1587 } else {
1588coalesce:
1589 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1590 chain->stat.over_size++;
1591 return RSC_FINAL;
1592 }
1593
1594 /* Here comes the right data, the payload length in v4/v6 is different,
1595 so use the field value to update and record the new data len */
1596 o_unit->payload += n_unit->payload; /* update new data len */
1597
1598 /* update field in ip header */
1599 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1600
1601 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1602 for windows guest, while this may change the behavior for linux
1603 guest (only if it uses RSC feature). */
1604 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
1605
1606 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
1607 o_unit->tcp->th_win = n_unit->tcp->th_win;
1608
1609 memmove(seg->buf + seg->size, data, n_unit->payload);
1610 seg->size += n_unit->payload;
1611 seg->packets++;
1612 chain->stat.coalesced++;
1613 return RSC_COALESCE;
1614 }
1615}
1616
1617static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
1618 VirtioNetRscSeg *seg,
1619 const uint8_t *buf, size_t size,
1620 VirtioNetRscUnit *unit)
1621{
1622 struct ip_header *ip1, *ip2;
1623
1624 ip1 = (struct ip_header *)(unit->ip);
1625 ip2 = (struct ip_header *)(seg->unit.ip);
1626 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
1627 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1628 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1629 chain->stat.no_match++;
1630 return RSC_NO_MATCH;
1631 }
1632
1633 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1634}
1635
1636static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
1637 VirtioNetRscSeg *seg,
1638 const uint8_t *buf, size_t size,
1639 VirtioNetRscUnit *unit)
1640{
1641 struct ip6_header *ip1, *ip2;
1642
1643 ip1 = (struct ip6_header *)(unit->ip);
1644 ip2 = (struct ip6_header *)(seg->unit.ip);
1645 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
1646 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
1647 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1648 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1649 chain->stat.no_match++;
1650 return RSC_NO_MATCH;
1651 }
1652
1653 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1654}
1655
1656/* Packets with 'SYN' should bypass, other flag should be sent after drain
1657 * to prevent out of order */
1658static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
1659 struct tcp_header *tcp)
1660{
1661 uint16_t tcp_hdr;
1662 uint16_t tcp_flag;
1663
1664 tcp_flag = htons(tcp->th_offset_flags);
1665 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
1666 tcp_flag &= VIRTIO_NET_TCP_FLAG;
1667 tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
1668 if (tcp_flag & TH_SYN) {
1669 chain->stat.tcp_syn++;
1670 return RSC_BYPASS;
1671 }
1672
1673 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
1674 chain->stat.tcp_ctrl_drain++;
1675 return RSC_FINAL;
1676 }
1677
1678 if (tcp_hdr > sizeof(struct tcp_header)) {
1679 chain->stat.tcp_all_opt++;
1680 return RSC_FINAL;
1681 }
1682
1683 return RSC_CANDIDATE;
1684}
1685
1686static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
1687 NetClientState *nc,
1688 const uint8_t *buf, size_t size,
1689 VirtioNetRscUnit *unit)
1690{
1691 int ret;
1692 VirtioNetRscSeg *seg, *nseg;
1693
1694 if (QTAILQ_EMPTY(&chain->buffers)) {
1695 chain->stat.empty_cache++;
1696 virtio_net_rsc_cache_buf(chain, nc, buf, size);
1697 timer_mod(chain->drain_timer,
1698 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1699 return size;
1700 }
1701
1702 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1703 if (chain->proto == ETH_P_IP) {
1704 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
1705 } else {
1706 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
1707 }
1708
1709 if (ret == RSC_FINAL) {
1710 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1711 /* Send failed */
1712 chain->stat.final_failed++;
1713 return 0;
1714 }
1715
1716 /* Send current packet */
1717 return virtio_net_do_receive(nc, buf, size);
1718 } else if (ret == RSC_NO_MATCH) {
1719 continue;
1720 } else {
1721 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
1722 seg->is_coalesced = 1;
1723 return size;
1724 }
1725 }
1726
1727 chain->stat.no_match_cache++;
1728 virtio_net_rsc_cache_buf(chain, nc, buf, size);
1729 return size;
1730}
1731
1732/* Drain a connection data, this is to avoid out of order segments */
1733static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
1734 NetClientState *nc,
1735 const uint8_t *buf, size_t size,
1736 uint16_t ip_start, uint16_t ip_size,
1737 uint16_t tcp_port)
1738{
1739 VirtioNetRscSeg *seg, *nseg;
1740 uint32_t ppair1, ppair2;
1741
1742 ppair1 = *(uint32_t *)(buf + tcp_port);
1743 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1744 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
1745 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
1746 || (ppair1 != ppair2)) {
1747 continue;
1748 }
1749 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1750 chain->stat.drain_failed++;
1751 }
1752
1753 break;
1754 }
1755
1756 return virtio_net_do_receive(nc, buf, size);
1757}
1758
1759static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
1760 struct ip_header *ip,
1761 const uint8_t *buf, size_t size)
1762{
1763 uint16_t ip_len;
1764
1765 /* Not an ipv4 packet */
1766 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
1767 chain->stat.ip_option++;
1768 return RSC_BYPASS;
1769 }
1770
1771 /* Don't handle packets with ip option */
1772 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
1773 chain->stat.ip_option++;
1774 return RSC_BYPASS;
1775 }
1776
1777 if (ip->ip_p != IPPROTO_TCP) {
1778 chain->stat.bypass_not_tcp++;
1779 return RSC_BYPASS;
1780 }
1781
1782 /* Don't handle packets with ip fragment */
1783 if (!(htons(ip->ip_off) & IP_DF)) {
1784 chain->stat.ip_frag++;
1785 return RSC_BYPASS;
1786 }
1787
1788 /* Don't handle packets with ecn flag */
1789 if (IPTOS_ECN(ip->ip_tos)) {
1790 chain->stat.ip_ecn++;
1791 return RSC_BYPASS;
1792 }
1793
1794 ip_len = htons(ip->ip_len);
1795 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
1796 || ip_len > (size - chain->n->guest_hdr_len -
1797 sizeof(struct eth_header))) {
1798 chain->stat.ip_hacked++;
1799 return RSC_BYPASS;
1800 }
1801
1802 return RSC_CANDIDATE;
1803}
1804
1805static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
1806 NetClientState *nc,
1807 const uint8_t *buf, size_t size)
1808{
1809 int32_t ret;
1810 uint16_t hdr_len;
1811 VirtioNetRscUnit unit;
1812
1813 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1814
1815 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
1816 + sizeof(struct tcp_header))) {
1817 chain->stat.bypass_not_tcp++;
1818 return virtio_net_do_receive(nc, buf, size);
1819 }
1820
1821 virtio_net_rsc_extract_unit4(chain, buf, &unit);
1822 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
1823 != RSC_CANDIDATE) {
1824 return virtio_net_do_receive(nc, buf, size);
1825 }
1826
1827 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1828 if (ret == RSC_BYPASS) {
1829 return virtio_net_do_receive(nc, buf, size);
1830 } else if (ret == RSC_FINAL) {
1831 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1832 ((hdr_len + sizeof(struct eth_header)) + 12),
1833 VIRTIO_NET_IP4_ADDR_SIZE,
1834 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
1835 }
1836
1837 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1838}
1839
1840static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
1841 struct ip6_header *ip6,
1842 const uint8_t *buf, size_t size)
1843{
1844 uint16_t ip_len;
1845
1846 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
1847 != IP_HEADER_VERSION_6) {
1848 return RSC_BYPASS;
1849 }
1850
1851 /* Both option and protocol is checked in this */
1852 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
1853 chain->stat.bypass_not_tcp++;
1854 return RSC_BYPASS;
1855 }
1856
1857 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1858 if (ip_len < sizeof(struct tcp_header) ||
1859 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
1860 - sizeof(struct ip6_header))) {
1861 chain->stat.ip_hacked++;
1862 return RSC_BYPASS;
1863 }
1864
1865 /* Don't handle packets with ecn flag */
1866 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
1867 chain->stat.ip_ecn++;
1868 return RSC_BYPASS;
1869 }
1870
1871 return RSC_CANDIDATE;
1872}
1873
1874static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
1875 const uint8_t *buf, size_t size)
1876{
1877 int32_t ret;
1878 uint16_t hdr_len;
1879 VirtioNetRscChain *chain;
1880 VirtioNetRscUnit unit;
1881
1882 chain = (VirtioNetRscChain *)opq;
1883 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1884
1885 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
1886 + sizeof(tcp_header))) {
1887 return virtio_net_do_receive(nc, buf, size);
1888 }
1889
1890 virtio_net_rsc_extract_unit6(chain, buf, &unit);
1891 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
1892 unit.ip, buf, size)) {
1893 return virtio_net_do_receive(nc, buf, size);
1894 }
1895
1896 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1897 if (ret == RSC_BYPASS) {
1898 return virtio_net_do_receive(nc, buf, size);
1899 } else if (ret == RSC_FINAL) {
1900 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1901 ((hdr_len + sizeof(struct eth_header)) + 8),
1902 VIRTIO_NET_IP6_ADDR_SIZE,
1903 hdr_len + sizeof(struct eth_header)
1904 + sizeof(struct ip6_header));
1905 }
1906
1907 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1908}
1909
1910static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
1911 NetClientState *nc,
1912 uint16_t proto)
1913{
1914 VirtioNetRscChain *chain;
1915
1916 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
1917 return NULL;
1918 }
1919
1920 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
1921 if (chain->proto == proto) {
1922 return chain;
1923 }
1924 }
1925
1926 chain = g_malloc(sizeof(*chain));
1927 chain->n = n;
1928 chain->proto = proto;
1929 if (proto == (uint16_t)ETH_P_IP) {
1930 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
1931 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1932 } else {
1933 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
1934 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1935 }
1936 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
1937 virtio_net_rsc_purge, chain);
1938 memset(&chain->stat, 0, sizeof(chain->stat));
1939
1940 QTAILQ_INIT(&chain->buffers);
1941 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
1942
1943 return chain;
1944}
1945
1946static ssize_t virtio_net_rsc_receive(NetClientState *nc,
1947 const uint8_t *buf,
1948 size_t size)
1949{
1950 uint16_t proto;
1951 VirtioNetRscChain *chain;
1952 struct eth_header *eth;
1953 VirtIONet *n;
1954
1955 n = qemu_get_nic_opaque(nc);
1956 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
1957 return virtio_net_do_receive(nc, buf, size);
1958 }
1959
1960 eth = (struct eth_header *)(buf + n->guest_hdr_len);
1961 proto = htons(eth->h_proto);
1962
1963 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
1964 if (chain) {
1965 chain->stat.received++;
1966 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
1967 return virtio_net_rsc_receive4(chain, nc, buf, size);
1968 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
1969 return virtio_net_rsc_receive6(chain, nc, buf, size);
1970 }
1971 }
1972 return virtio_net_do_receive(nc, buf, size);
1973}
1974
1975static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
1976 size_t size)
1977{
1978 VirtIONet *n = qemu_get_nic_opaque(nc);
1979 if ((n->rsc4_enabled || n->rsc6_enabled)) {
1980 return virtio_net_rsc_receive(nc, buf, size);
1981 } else {
1982 return virtio_net_do_receive(nc, buf, size);
1983 }
1984}
1985
0c87e93e 1986static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
6243375f 1987
4e68f7a0 1988static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
6243375f 1989{
cc1f0f45 1990 VirtIONet *n = qemu_get_nic_opaque(nc);
fed699f9 1991 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
17a0ca55 1992 VirtIODevice *vdev = VIRTIO_DEVICE(n);
6243375f 1993
51b19ebe 1994 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
17a0ca55 1995 virtio_notify(vdev, q->tx_vq);
6243375f 1996
51b19ebe
PB
1997 g_free(q->async_tx.elem);
1998 q->async_tx.elem = NULL;
6243375f 1999
0c87e93e
JW
2000 virtio_queue_set_notification(q->tx_vq, 1);
2001 virtio_net_flush_tx(q);
6243375f
MM
2002}
2003
fbe78f4f 2004/* TX */
0c87e93e 2005static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
fbe78f4f 2006{
0c87e93e 2007 VirtIONet *n = q->n;
17a0ca55 2008 VirtIODevice *vdev = VIRTIO_DEVICE(n);
51b19ebe 2009 VirtQueueElement *elem;
e3f30488 2010 int32_t num_packets = 0;
fed699f9 2011 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
17a0ca55 2012 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
e3f30488
AW
2013 return num_packets;
2014 }
fbe78f4f 2015
51b19ebe 2016 if (q->async_tx.elem) {
0c87e93e 2017 virtio_queue_set_notification(q->tx_vq, 0);
e3f30488 2018 return num_packets;
6243375f
MM
2019 }
2020
51b19ebe 2021 for (;;) {
bd89dd98 2022 ssize_t ret;
51b19ebe
PB
2023 unsigned int out_num;
2024 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
feb93f36 2025 struct virtio_net_hdr_mrg_rxbuf mhdr;
fbe78f4f 2026
51b19ebe
PB
2027 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2028 if (!elem) {
2029 break;
2030 }
2031
2032 out_num = elem->out_num;
2033 out_sg = elem->out_sg;
7b80d08e 2034 if (out_num < 1) {
fa5e56c2
GK
2035 virtio_error(vdev, "virtio-net header not in first element");
2036 virtqueue_detach_element(q->tx_vq, elem, 0);
2037 g_free(elem);
2038 return -EINVAL;
fbe78f4f
AL
2039 }
2040
032a74a1 2041 if (n->has_vnet_hdr) {
feb93f36
JW
2042 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2043 n->guest_hdr_len) {
fa5e56c2
GK
2044 virtio_error(vdev, "virtio-net header incorrect");
2045 virtqueue_detach_element(q->tx_vq, elem, 0);
2046 g_free(elem);
2047 return -EINVAL;
032a74a1 2048 }
1bfa316c 2049 if (n->needs_vnet_hdr_swap) {
feb93f36
JW
2050 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2051 sg2[0].iov_base = &mhdr;
2052 sg2[0].iov_len = n->guest_hdr_len;
2053 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2054 out_sg, out_num,
2055 n->guest_hdr_len, -1);
2056 if (out_num == VIRTQUEUE_MAX_SIZE) {
2057 goto drop;
7d37435b 2058 }
feb93f36
JW
2059 out_num += 1;
2060 out_sg = sg2;
7d37435b 2061 }
032a74a1 2062 }
14761f9c
MT
2063 /*
2064 * If host wants to see the guest header as is, we can
2065 * pass it on unchanged. Otherwise, copy just the parts
2066 * that host is interested in.
2067 */
2068 assert(n->host_hdr_len <= n->guest_hdr_len);
2069 if (n->host_hdr_len != n->guest_hdr_len) {
2070 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2071 out_sg, out_num,
2072 0, n->host_hdr_len);
2073 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2074 out_sg, out_num,
2075 n->guest_hdr_len, -1);
2076 out_num = sg_num;
2077 out_sg = sg;
fbe78f4f
AL
2078 }
2079
fed699f9
JW
2080 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2081 out_sg, out_num, virtio_net_tx_complete);
6243375f 2082 if (ret == 0) {
0c87e93e
JW
2083 virtio_queue_set_notification(q->tx_vq, 0);
2084 q->async_tx.elem = elem;
e3f30488 2085 return -EBUSY;
6243375f
MM
2086 }
2087
feb93f36 2088drop:
51b19ebe 2089 virtqueue_push(q->tx_vq, elem, 0);
17a0ca55 2090 virtio_notify(vdev, q->tx_vq);
51b19ebe 2091 g_free(elem);
e3f30488
AW
2092
2093 if (++num_packets >= n->tx_burst) {
2094 break;
2095 }
fbe78f4f 2096 }
e3f30488 2097 return num_packets;
fbe78f4f
AL
2098}
2099
a697a334 2100static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
fbe78f4f 2101{
17a0ca55 2102 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 2103 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
fbe78f4f 2104
283e2c2a
YB
2105 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2106 virtio_net_drop_tx_queue_data(vdev, vq);
2107 return;
2108 }
2109
783e7706 2110 /* This happens when device was stopped but VCPU wasn't. */
17a0ca55 2111 if (!vdev->vm_running) {
0c87e93e 2112 q->tx_waiting = 1;
783e7706
MT
2113 return;
2114 }
2115
0c87e93e 2116 if (q->tx_waiting) {
fbe78f4f 2117 virtio_queue_set_notification(vq, 1);
bc72ad67 2118 timer_del(q->tx_timer);
0c87e93e 2119 q->tx_waiting = 0;
fa5e56c2
GK
2120 if (virtio_net_flush_tx(q) == -EINVAL) {
2121 return;
2122 }
fbe78f4f 2123 } else {
bc72ad67
AB
2124 timer_mod(q->tx_timer,
2125 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
0c87e93e 2126 q->tx_waiting = 1;
fbe78f4f
AL
2127 virtio_queue_set_notification(vq, 0);
2128 }
2129}
2130
a697a334
AW
2131static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2132{
17a0ca55 2133 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 2134 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
a697a334 2135
283e2c2a
YB
2136 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2137 virtio_net_drop_tx_queue_data(vdev, vq);
2138 return;
2139 }
2140
0c87e93e 2141 if (unlikely(q->tx_waiting)) {
a697a334
AW
2142 return;
2143 }
0c87e93e 2144 q->tx_waiting = 1;
783e7706 2145 /* This happens when device was stopped but VCPU wasn't. */
17a0ca55 2146 if (!vdev->vm_running) {
783e7706
MT
2147 return;
2148 }
a697a334 2149 virtio_queue_set_notification(vq, 0);
0c87e93e 2150 qemu_bh_schedule(q->tx_bh);
a697a334
AW
2151}
2152
fbe78f4f
AL
2153static void virtio_net_tx_timer(void *opaque)
2154{
0c87e93e
JW
2155 VirtIONetQueue *q = opaque;
2156 VirtIONet *n = q->n;
17a0ca55 2157 VirtIODevice *vdev = VIRTIO_DEVICE(n);
e8bcf842
MT
2158 /* This happens when device was stopped but BH wasn't. */
2159 if (!vdev->vm_running) {
2160 /* Make sure tx waiting is set, so we'll run when restarted. */
2161 assert(q->tx_waiting);
2162 return;
2163 }
fbe78f4f 2164
0c87e93e 2165 q->tx_waiting = 0;
fbe78f4f
AL
2166
2167 /* Just in case the driver is not ready on more */
17a0ca55 2168 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
fbe78f4f 2169 return;
17a0ca55 2170 }
fbe78f4f 2171
0c87e93e
JW
2172 virtio_queue_set_notification(q->tx_vq, 1);
2173 virtio_net_flush_tx(q);
fbe78f4f
AL
2174}
2175
a697a334
AW
2176static void virtio_net_tx_bh(void *opaque)
2177{
0c87e93e
JW
2178 VirtIONetQueue *q = opaque;
2179 VirtIONet *n = q->n;
17a0ca55 2180 VirtIODevice *vdev = VIRTIO_DEVICE(n);
a697a334
AW
2181 int32_t ret;
2182
e8bcf842
MT
2183 /* This happens when device was stopped but BH wasn't. */
2184 if (!vdev->vm_running) {
2185 /* Make sure tx waiting is set, so we'll run when restarted. */
2186 assert(q->tx_waiting);
2187 return;
2188 }
783e7706 2189
0c87e93e 2190 q->tx_waiting = 0;
a697a334
AW
2191
2192 /* Just in case the driver is not ready on more */
17a0ca55 2193 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
a697a334 2194 return;
17a0ca55 2195 }
a697a334 2196
0c87e93e 2197 ret = virtio_net_flush_tx(q);
fa5e56c2
GK
2198 if (ret == -EBUSY || ret == -EINVAL) {
2199 return; /* Notification re-enable handled by tx_complete or device
2200 * broken */
a697a334
AW
2201 }
2202
2203 /* If we flush a full burst of packets, assume there are
2204 * more coming and immediately reschedule */
2205 if (ret >= n->tx_burst) {
0c87e93e
JW
2206 qemu_bh_schedule(q->tx_bh);
2207 q->tx_waiting = 1;
a697a334
AW
2208 return;
2209 }
2210
2211 /* If less than a full burst, re-enable notification and flush
2212 * anything that may have come in while we weren't looking. If
2213 * we find something, assume the guest is still active and reschedule */
0c87e93e 2214 virtio_queue_set_notification(q->tx_vq, 1);
fa5e56c2
GK
2215 ret = virtio_net_flush_tx(q);
2216 if (ret == -EINVAL) {
2217 return;
2218 } else if (ret > 0) {
0c87e93e
JW
2219 virtio_queue_set_notification(q->tx_vq, 0);
2220 qemu_bh_schedule(q->tx_bh);
2221 q->tx_waiting = 1;
a697a334
AW
2222 }
2223}
2224
f9d6dbf0
WC
2225static void virtio_net_add_queue(VirtIONet *n, int index)
2226{
2227 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2228
1c0fbfa3
MT
2229 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2230 virtio_net_handle_rx);
9b02e161 2231
f9d6dbf0
WC
2232 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2233 n->vqs[index].tx_vq =
9b02e161
WW
2234 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2235 virtio_net_handle_tx_timer);
f9d6dbf0
WC
2236 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2237 virtio_net_tx_timer,
2238 &n->vqs[index]);
2239 } else {
2240 n->vqs[index].tx_vq =
9b02e161
WW
2241 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2242 virtio_net_handle_tx_bh);
f9d6dbf0
WC
2243 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2244 }
2245
2246 n->vqs[index].tx_waiting = 0;
2247 n->vqs[index].n = n;
2248}
2249
2250static void virtio_net_del_queue(VirtIONet *n, int index)
2251{
2252 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2253 VirtIONetQueue *q = &n->vqs[index];
2254 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2255
2256 qemu_purge_queued_packets(nc);
2257
2258 virtio_del_queue(vdev, index * 2);
2259 if (q->tx_timer) {
2260 timer_del(q->tx_timer);
2261 timer_free(q->tx_timer);
f989c30c 2262 q->tx_timer = NULL;
f9d6dbf0
WC
2263 } else {
2264 qemu_bh_delete(q->tx_bh);
f989c30c 2265 q->tx_bh = NULL;
f9d6dbf0 2266 }
f989c30c 2267 q->tx_waiting = 0;
f9d6dbf0
WC
2268 virtio_del_queue(vdev, index * 2 + 1);
2269}
2270
2271static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2272{
2273 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2274 int old_num_queues = virtio_get_num_queues(vdev);
2275 int new_num_queues = new_max_queues * 2 + 1;
2276 int i;
2277
2278 assert(old_num_queues >= 3);
2279 assert(old_num_queues % 2 == 1);
2280
2281 if (old_num_queues == new_num_queues) {
2282 return;
2283 }
2284
2285 /*
2286 * We always need to remove and add ctrl vq if
2287 * old_num_queues != new_num_queues. Remove ctrl_vq first,
20f86a75 2288 * and then we only enter one of the following two loops.
f9d6dbf0
WC
2289 */
2290 virtio_del_queue(vdev, old_num_queues - 1);
2291
2292 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2293 /* new_num_queues < old_num_queues */
2294 virtio_net_del_queue(n, i / 2);
2295 }
2296
2297 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2298 /* new_num_queues > old_num_queues */
2299 virtio_net_add_queue(n, i / 2);
2300 }
2301
2302 /* add ctrl_vq last */
2303 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2304}
2305
ec57db16 2306static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
fed699f9 2307{
f9d6dbf0
WC
2308 int max = multiqueue ? n->max_queues : 1;
2309
fed699f9 2310 n->multiqueue = multiqueue;
f9d6dbf0 2311 virtio_net_change_num_queues(n, max);
fed699f9 2312
fed699f9
JW
2313 virtio_net_set_queues(n);
2314}
2315
982b78c5 2316static int virtio_net_post_load_device(void *opaque, int version_id)
037dab2f 2317{
982b78c5
DDAG
2318 VirtIONet *n = opaque;
2319 VirtIODevice *vdev = VIRTIO_DEVICE(n);
037dab2f 2320 int i, link_down;
fbe78f4f 2321
9d8c6a25 2322 trace_virtio_net_post_load_device();
982b78c5 2323 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
95129d6f
CH
2324 virtio_vdev_has_feature(vdev,
2325 VIRTIO_F_VERSION_1));
fbe78f4f 2326
76010cb3 2327 /* MAC_TABLE_ENTRIES may be different from the saved image */
982b78c5 2328 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
76010cb3 2329 n->mac_table.in_use = 0;
b6503ed9 2330 }
0ce0e8f4 2331
982b78c5 2332 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
6c666823
MT
2333 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2334 }
2335
7788c3f2
MS
2336 /*
2337 * curr_guest_offloads will be later overwritten by the
2338 * virtio_set_features_nocheck call done from the virtio_load.
2339 * Here we make sure it is preserved and restored accordingly
2340 * in the virtio_net_post_load_virtio callback.
2341 */
2342 n->saved_guest_offloads = n->curr_guest_offloads;
6c666823 2343
5f800801
JW
2344 virtio_net_set_queues(n);
2345
2d9aba39
AW
2346 /* Find the first multicast entry in the saved MAC filter */
2347 for (i = 0; i < n->mac_table.in_use; i++) {
2348 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2349 break;
2350 }
2351 }
2352 n->mac_table.first_multi = i;
98991481
AK
2353
2354 /* nc.link_down can't be migrated, so infer link_down according
2355 * to link status bit in n->status */
5f800801
JW
2356 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2357 for (i = 0; i < n->max_queues; i++) {
2358 qemu_get_subqueue(n->nic, i)->link_down = link_down;
2359 }
98991481 2360
6c666823
MT
2361 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2362 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
9d8c6a25
DDAG
2363 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2364 QEMU_CLOCK_VIRTUAL,
2365 virtio_net_announce_timer, n);
2366 if (n->announce_timer.round) {
2367 timer_mod(n->announce_timer.tm,
2368 qemu_clock_get_ms(n->announce_timer.type));
2369 } else {
944458b6 2370 qemu_announce_timer_del(&n->announce_timer, false);
9d8c6a25 2371 }
6c666823
MT
2372 }
2373
fbe78f4f
AL
2374 return 0;
2375}
2376
7788c3f2
MS
2377static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2378{
2379 VirtIONet *n = VIRTIO_NET(vdev);
2380 /*
2381 * The actual needed state is now in saved_guest_offloads,
2382 * see virtio_net_post_load_device for detail.
2383 * Restore it back and apply the desired offloads.
2384 */
2385 n->curr_guest_offloads = n->saved_guest_offloads;
2386 if (peer_has_vnet_hdr(n)) {
2387 virtio_net_apply_guest_offloads(n);
2388 }
2389
2390 return 0;
2391}
2392
982b78c5
DDAG
2393/* tx_waiting field of a VirtIONetQueue */
2394static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2395 .name = "virtio-net-queue-tx_waiting",
2396 .fields = (VMStateField[]) {
2397 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2398 VMSTATE_END_OF_LIST()
2399 },
2400};
2401
2402static bool max_queues_gt_1(void *opaque, int version_id)
2403{
2404 return VIRTIO_NET(opaque)->max_queues > 1;
2405}
2406
2407static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2408{
2409 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2410 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2411}
2412
2413static bool mac_table_fits(void *opaque, int version_id)
2414{
2415 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2416}
2417
2418static bool mac_table_doesnt_fit(void *opaque, int version_id)
2419{
2420 return !mac_table_fits(opaque, version_id);
2421}
2422
2423/* This temporary type is shared by all the WITH_TMP methods
2424 * although only some fields are used by each.
2425 */
2426struct VirtIONetMigTmp {
2427 VirtIONet *parent;
2428 VirtIONetQueue *vqs_1;
2429 uint16_t curr_queues_1;
2430 uint8_t has_ufo;
2431 uint32_t has_vnet_hdr;
2432};
2433
2434/* The 2nd and subsequent tx_waiting flags are loaded later than
2435 * the 1st entry in the queues and only if there's more than one
2436 * entry. We use the tmp mechanism to calculate a temporary
2437 * pointer and count and also validate the count.
2438 */
2439
44b1ff31 2440static int virtio_net_tx_waiting_pre_save(void *opaque)
982b78c5
DDAG
2441{
2442 struct VirtIONetMigTmp *tmp = opaque;
2443
2444 tmp->vqs_1 = tmp->parent->vqs + 1;
2445 tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2446 if (tmp->parent->curr_queues == 0) {
2447 tmp->curr_queues_1 = 0;
2448 }
44b1ff31
DDAG
2449
2450 return 0;
982b78c5
DDAG
2451}
2452
2453static int virtio_net_tx_waiting_pre_load(void *opaque)
2454{
2455 struct VirtIONetMigTmp *tmp = opaque;
2456
2457 /* Reuse the pointer setup from save */
2458 virtio_net_tx_waiting_pre_save(opaque);
2459
2460 if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2461 error_report("virtio-net: curr_queues %x > max_queues %x",
2462 tmp->parent->curr_queues, tmp->parent->max_queues);
2463
2464 return -EINVAL;
2465 }
2466
2467 return 0; /* all good */
2468}
2469
2470static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2471 .name = "virtio-net-tx_waiting",
2472 .pre_load = virtio_net_tx_waiting_pre_load,
2473 .pre_save = virtio_net_tx_waiting_pre_save,
2474 .fields = (VMStateField[]) {
2475 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2476 curr_queues_1,
2477 vmstate_virtio_net_queue_tx_waiting,
2478 struct VirtIONetQueue),
2479 VMSTATE_END_OF_LIST()
2480 },
2481};
2482
2483/* the 'has_ufo' flag is just tested; if the incoming stream has the
2484 * flag set we need to check that we have it
2485 */
2486static int virtio_net_ufo_post_load(void *opaque, int version_id)
2487{
2488 struct VirtIONetMigTmp *tmp = opaque;
2489
2490 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2491 error_report("virtio-net: saved image requires TUN_F_UFO support");
2492 return -EINVAL;
2493 }
2494
2495 return 0;
2496}
2497
44b1ff31 2498static int virtio_net_ufo_pre_save(void *opaque)
982b78c5
DDAG
2499{
2500 struct VirtIONetMigTmp *tmp = opaque;
2501
2502 tmp->has_ufo = tmp->parent->has_ufo;
44b1ff31
DDAG
2503
2504 return 0;
982b78c5
DDAG
2505}
2506
2507static const VMStateDescription vmstate_virtio_net_has_ufo = {
2508 .name = "virtio-net-ufo",
2509 .post_load = virtio_net_ufo_post_load,
2510 .pre_save = virtio_net_ufo_pre_save,
2511 .fields = (VMStateField[]) {
2512 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2513 VMSTATE_END_OF_LIST()
2514 },
2515};
2516
2517/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2518 * flag set we need to check that we have it
2519 */
2520static int virtio_net_vnet_post_load(void *opaque, int version_id)
2521{
2522 struct VirtIONetMigTmp *tmp = opaque;
2523
2524 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2525 error_report("virtio-net: saved image requires vnet_hdr=on");
2526 return -EINVAL;
2527 }
2528
2529 return 0;
2530}
2531
44b1ff31 2532static int virtio_net_vnet_pre_save(void *opaque)
982b78c5
DDAG
2533{
2534 struct VirtIONetMigTmp *tmp = opaque;
2535
2536 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
44b1ff31
DDAG
2537
2538 return 0;
982b78c5
DDAG
2539}
2540
2541static const VMStateDescription vmstate_virtio_net_has_vnet = {
2542 .name = "virtio-net-vnet",
2543 .post_load = virtio_net_vnet_post_load,
2544 .pre_save = virtio_net_vnet_pre_save,
2545 .fields = (VMStateField[]) {
2546 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2547 VMSTATE_END_OF_LIST()
2548 },
2549};
2550
2551static const VMStateDescription vmstate_virtio_net_device = {
2552 .name = "virtio-net-device",
2553 .version_id = VIRTIO_NET_VM_VERSION,
2554 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2555 .post_load = virtio_net_post_load_device,
2556 .fields = (VMStateField[]) {
2557 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2558 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2559 vmstate_virtio_net_queue_tx_waiting,
2560 VirtIONetQueue),
2561 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2562 VMSTATE_UINT16(status, VirtIONet),
2563 VMSTATE_UINT8(promisc, VirtIONet),
2564 VMSTATE_UINT8(allmulti, VirtIONet),
2565 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2566
2567 /* Guarded pair: If it fits we load it, else we throw it away
2568 * - can happen if source has a larger MAC table.; post-load
2569 * sets flags in this case.
2570 */
2571 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
2572 0, mac_table_fits, mac_table.in_use,
2573 ETH_ALEN),
2574 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
2575 mac_table.in_use, ETH_ALEN),
2576
2577 /* Note: This is an array of uint32's that's always been saved as a
2578 * buffer; hold onto your endiannesses; it's actually used as a bitmap
2579 * but based on the uint.
2580 */
2581 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
2582 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2583 vmstate_virtio_net_has_vnet),
2584 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
2585 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
2586 VMSTATE_UINT8(alluni, VirtIONet),
2587 VMSTATE_UINT8(nomulti, VirtIONet),
2588 VMSTATE_UINT8(nouni, VirtIONet),
2589 VMSTATE_UINT8(nobcast, VirtIONet),
2590 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2591 vmstate_virtio_net_has_ufo),
2592 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
2593 vmstate_info_uint16_equal, uint16_t),
2594 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
2595 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2596 vmstate_virtio_net_tx_waiting),
2597 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
2598 has_ctrl_guest_offloads),
2599 VMSTATE_END_OF_LIST()
2600 },
2601};
2602
eb6b6c12 2603static NetClientInfo net_virtio_info = {
f394b2e2 2604 .type = NET_CLIENT_DRIVER_NIC,
eb6b6c12
MM
2605 .size = sizeof(NICState),
2606 .can_receive = virtio_net_can_receive,
2607 .receive = virtio_net_receive,
eb6b6c12 2608 .link_status_changed = virtio_net_set_link_status,
b1be4280 2609 .query_rx_filter = virtio_net_query_rxfilter,
b2c929f0 2610 .announce = virtio_net_announce,
eb6b6c12
MM
2611};
2612
f56a1247
MT
2613static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
2614{
17a0ca55 2615 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 2616 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
f56a1247 2617 assert(n->vhost_started);
ed8b4afe 2618 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
f56a1247
MT
2619}
2620
2621static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
2622 bool mask)
2623{
17a0ca55 2624 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 2625 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
f56a1247 2626 assert(n->vhost_started);
ed8b4afe 2627 vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
f56a1247
MT
2628 vdev, idx, mask);
2629}
2630
019a3edb 2631static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
fbe78f4f 2632{
0cd09c3a 2633 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
a93e599d 2634
ba550851
SG
2635 n->config_size = virtio_feature_get_config_size(feature_sizes,
2636 host_features);
17ec5a86
FK
2637}
2638
8a253ec2
FK
2639void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
2640 const char *type)
2641{
2642 /*
2643 * The name can be NULL, the netclient name will be type.x.
2644 */
2645 assert(type != NULL);
2646
9e288406 2647 g_free(n->netclient_name);
9e288406 2648 g_free(n->netclient_type);
80e0090a 2649 n->netclient_name = g_strdup(name);
8a253ec2
FK
2650 n->netclient_type = g_strdup(type);
2651}
2652
e6f746b3 2653static void virtio_net_device_realize(DeviceState *dev, Error **errp)
17ec5a86 2654{
e6f746b3 2655 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
284a32f0 2656 VirtIONet *n = VIRTIO_NET(dev);
b1be4280 2657 NetClientState *nc;
284a32f0 2658 int i;
1773d9ee 2659
a93e599d 2660 if (n->net_conf.mtu) {
127833ee 2661 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
a93e599d
MC
2662 }
2663
9473939e
JB
2664 if (n->net_conf.duplex_str) {
2665 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
2666 n->net_conf.duplex = DUPLEX_HALF;
2667 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
2668 n->net_conf.duplex = DUPLEX_FULL;
2669 } else {
2670 error_setg(errp, "'duplex' must be 'half' or 'full'");
2671 }
2672 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2673 } else {
2674 n->net_conf.duplex = DUPLEX_UNKNOWN;
2675 }
2676
2677 if (n->net_conf.speed < SPEED_UNKNOWN) {
2678 error_setg(errp, "'speed' must be between 0 and INT_MAX");
2679 } else if (n->net_conf.speed >= 0) {
2680 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2681 }
2682
da3e8a23 2683 virtio_net_set_config_size(n, n->host_features);
284a32f0 2684 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
fbe78f4f 2685
1c0fbfa3
MT
2686 /*
2687 * We set a lower limit on RX queue size to what it always was.
2688 * Guests that want a smaller ring can always resize it without
2689 * help from us (using virtio 1 and up).
2690 */
2691 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
2692 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
5f997fd1 2693 !is_power_of_2(n->net_conf.rx_queue_size)) {
1c0fbfa3
MT
2694 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
2695 "must be a power of 2 between %d and %d.",
2696 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
2697 VIRTQUEUE_MAX_SIZE);
2698 virtio_cleanup(vdev);
2699 return;
2700 }
2701
9b02e161
WW
2702 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
2703 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
2704 !is_power_of_2(n->net_conf.tx_queue_size)) {
2705 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
2706 "must be a power of 2 between %d and %d",
2707 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
2708 VIRTQUEUE_MAX_SIZE);
2709 virtio_cleanup(vdev);
2710 return;
2711 }
2712
575a1c0e 2713 n->max_queues = MAX(n->nic_conf.peers.queues, 1);
87b3bd1c 2714 if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
7e0e736e 2715 error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
631b22ea 2716 "must be a positive integer less than %d.",
87b3bd1c 2717 n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
7e0e736e
JW
2718 virtio_cleanup(vdev);
2719 return;
2720 }
f6b26cf2 2721 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
fed699f9 2722 n->curr_queues = 1;
1773d9ee 2723 n->tx_timeout = n->net_conf.txtimer;
a697a334 2724
1773d9ee
FK
2725 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
2726 && strcmp(n->net_conf.tx, "bh")) {
0765691e
MA
2727 warn_report("virtio-net: "
2728 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
2729 n->net_conf.tx);
2730 error_printf("Defaulting to \"bh\"");
a697a334
AW
2731 }
2732
2eef278b
MT
2733 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
2734 n->net_conf.tx_queue_size);
9b02e161 2735
da51a335 2736 for (i = 0; i < n->max_queues; i++) {
f9d6dbf0 2737 virtio_net_add_queue(n, i);
a697a334 2738 }
da51a335 2739
17a0ca55 2740 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1773d9ee
FK
2741 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
2742 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
554c97dd 2743 n->status = VIRTIO_NET_S_LINK_UP;
9d8c6a25
DDAG
2744 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2745 QEMU_CLOCK_VIRTUAL,
2746 virtio_net_announce_timer, n);
b2c929f0 2747 n->announce_timer.round = 0;
fbe78f4f 2748
8a253ec2
FK
2749 if (n->netclient_type) {
2750 /*
2751 * Happen when virtio_net_set_netclient_name has been called.
2752 */
2753 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2754 n->netclient_type, n->netclient_name, n);
2755 } else {
2756 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
284a32f0 2757 object_get_typename(OBJECT(dev)), dev->id, n);
8a253ec2
FK
2758 }
2759
6e371ab8
MT
2760 peer_test_vnet_hdr(n);
2761 if (peer_has_vnet_hdr(n)) {
fed699f9 2762 for (i = 0; i < n->max_queues; i++) {
d6085e3a 2763 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
fed699f9 2764 }
6e371ab8
MT
2765 n->host_hdr_len = sizeof(struct virtio_net_hdr);
2766 } else {
2767 n->host_hdr_len = 0;
2768 }
eb6b6c12 2769
1773d9ee 2770 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
96d5e201 2771
fed699f9 2772 n->vqs[0].tx_waiting = 0;
1773d9ee 2773 n->tx_burst = n->net_conf.txburst;
bb9d17f8 2774 virtio_net_set_mrg_rx_bufs(n, 0, 0);
002437cd 2775 n->promisc = 1; /* for compatibility */
fbe78f4f 2776
7267c094 2777 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
b6503ed9 2778
7267c094 2779 n->vlans = g_malloc0(MAX_VLAN >> 3);
f21c0ed9 2780
b1be4280
AK
2781 nc = qemu_get_queue(n->nic);
2782 nc->rxfilter_notify_enabled = 1;
2783
2974e916 2784 QTAILQ_INIT(&n->rsc_chains);
284a32f0 2785 n->qdev = dev;
17ec5a86
FK
2786}
2787
306ec6c3 2788static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
17ec5a86 2789{
306ec6c3
AF
2790 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2791 VirtIONet *n = VIRTIO_NET(dev);
f9d6dbf0 2792 int i, max_queues;
17ec5a86
FK
2793
2794 /* This will stop vhost backend if appropriate. */
2795 virtio_net_set_status(vdev, 0);
2796
9e288406
MA
2797 g_free(n->netclient_name);
2798 n->netclient_name = NULL;
2799 g_free(n->netclient_type);
2800 n->netclient_type = NULL;
8a253ec2 2801
17ec5a86
FK
2802 g_free(n->mac_table.macs);
2803 g_free(n->vlans);
2804
f9d6dbf0
WC
2805 max_queues = n->multiqueue ? n->max_queues : 1;
2806 for (i = 0; i < max_queues; i++) {
2807 virtio_net_del_queue(n, i);
17ec5a86
FK
2808 }
2809
944458b6 2810 qemu_announce_timer_del(&n->announce_timer, false);
17ec5a86
FK
2811 g_free(n->vqs);
2812 qemu_del_nic(n->nic);
2974e916 2813 virtio_net_rsc_cleanup(n);
6a1a8cc7 2814 virtio_cleanup(vdev);
17ec5a86
FK
2815}
2816
2817static void virtio_net_instance_init(Object *obj)
2818{
2819 VirtIONet *n = VIRTIO_NET(obj);
2820
2821 /*
2822 * The default config_size is sizeof(struct virtio_net_config).
2823 * Can be overriden with virtio_net_set_config_size.
2824 */
2825 n->config_size = sizeof(struct virtio_net_config);
aa4197c3
GA
2826 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
2827 "bootindex", "/ethernet-phy@0",
2828 DEVICE(n), NULL);
17ec5a86
FK
2829}
2830
44b1ff31 2831static int virtio_net_pre_save(void *opaque)
4d45dcfb
HP
2832{
2833 VirtIONet *n = opaque;
2834
2835 /* At this point, backend must be stopped, otherwise
2836 * it might keep writing to memory. */
2837 assert(!n->vhost_started);
44b1ff31
DDAG
2838
2839 return 0;
4d45dcfb
HP
2840}
2841
2842static const VMStateDescription vmstate_virtio_net = {
2843 .name = "virtio-net",
2844 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2845 .version_id = VIRTIO_NET_VM_VERSION,
2846 .fields = (VMStateField[]) {
2847 VMSTATE_VIRTIO_DEVICE,
2848 VMSTATE_END_OF_LIST()
2849 },
2850 .pre_save = virtio_net_pre_save,
2851};
290c2428 2852
17ec5a86 2853static Property virtio_net_properties[] = {
127833ee
JB
2854 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
2855 VIRTIO_NET_F_CSUM, true),
2856 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
87108bb2 2857 VIRTIO_NET_F_GUEST_CSUM, true),
127833ee
JB
2858 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
2859 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
87108bb2 2860 VIRTIO_NET_F_GUEST_TSO4, true),
127833ee 2861 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
87108bb2 2862 VIRTIO_NET_F_GUEST_TSO6, true),
127833ee 2863 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
87108bb2 2864 VIRTIO_NET_F_GUEST_ECN, true),
127833ee 2865 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
87108bb2 2866 VIRTIO_NET_F_GUEST_UFO, true),
127833ee 2867 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
87108bb2 2868 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
127833ee 2869 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
87108bb2 2870 VIRTIO_NET_F_HOST_TSO4, true),
127833ee 2871 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
87108bb2 2872 VIRTIO_NET_F_HOST_TSO6, true),
127833ee 2873 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
87108bb2 2874 VIRTIO_NET_F_HOST_ECN, true),
127833ee 2875 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
87108bb2 2876 VIRTIO_NET_F_HOST_UFO, true),
127833ee 2877 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
87108bb2 2878 VIRTIO_NET_F_MRG_RXBUF, true),
127833ee 2879 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
87108bb2 2880 VIRTIO_NET_F_STATUS, true),
127833ee 2881 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
87108bb2 2882 VIRTIO_NET_F_CTRL_VQ, true),
127833ee 2883 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
87108bb2 2884 VIRTIO_NET_F_CTRL_RX, true),
127833ee 2885 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
87108bb2 2886 VIRTIO_NET_F_CTRL_VLAN, true),
127833ee 2887 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
87108bb2 2888 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
127833ee 2889 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
87108bb2 2890 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
127833ee 2891 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
87108bb2 2892 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
127833ee 2893 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
2974e916
YB
2894 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
2895 VIRTIO_NET_F_RSC_EXT, false),
2896 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
2897 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
17ec5a86
FK
2898 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
2899 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
87108bb2 2900 TX_TIMER_INTERVAL),
17ec5a86
FK
2901 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
2902 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
1c0fbfa3
MT
2903 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
2904 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
9b02e161
WW
2905 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
2906 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
a93e599d 2907 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
75ebec11
MC
2908 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
2909 true),
9473939e
JB
2910 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
2911 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
17ec5a86
FK
2912 DEFINE_PROP_END_OF_LIST(),
2913};
2914
2915static void virtio_net_class_init(ObjectClass *klass, void *data)
2916{
2917 DeviceClass *dc = DEVICE_CLASS(klass);
2918 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
e6f746b3 2919
17ec5a86 2920 dc->props = virtio_net_properties;
290c2428 2921 dc->vmsd = &vmstate_virtio_net;
125ee0ed 2922 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
e6f746b3 2923 vdc->realize = virtio_net_device_realize;
306ec6c3 2924 vdc->unrealize = virtio_net_device_unrealize;
17ec5a86
FK
2925 vdc->get_config = virtio_net_get_config;
2926 vdc->set_config = virtio_net_set_config;
2927 vdc->get_features = virtio_net_get_features;
2928 vdc->set_features = virtio_net_set_features;
2929 vdc->bad_features = virtio_net_bad_features;
2930 vdc->reset = virtio_net_reset;
2931 vdc->set_status = virtio_net_set_status;
2932 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
2933 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
2a083ffd 2934 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
7788c3f2 2935 vdc->post_load = virtio_net_post_load_virtio;
982b78c5 2936 vdc->vmsd = &vmstate_virtio_net_device;
17ec5a86
FK
2937}
2938
2939static const TypeInfo virtio_net_info = {
2940 .name = TYPE_VIRTIO_NET,
2941 .parent = TYPE_VIRTIO_DEVICE,
2942 .instance_size = sizeof(VirtIONet),
2943 .instance_init = virtio_net_instance_init,
2944 .class_init = virtio_net_class_init,
2945};
2946
2947static void virtio_register_types(void)
2948{
2949 type_register_static(&virtio_net_info);
2950}
2951
2952type_init(virtio_register_types)