]> git.proxmox.com Git - mirror_qemu.git/blame - hw/net/virtio-net.c
Include hw/qdev-properties.h less
[mirror_qemu.git] / hw / net / virtio-net.c
CommitLineData
fbe78f4f
AL
1/*
2 * Virtio Network Device
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
9b8bfe21 14#include "qemu/osdep.h"
1de7afc9 15#include "qemu/iov.h"
db725815 16#include "qemu/main-loop.h"
0b8fa32f 17#include "qemu/module.h"
0d09e41a 18#include "hw/virtio/virtio.h"
1422e32d 19#include "net/net.h"
7200ac3c 20#include "net/checksum.h"
a8ed73f7 21#include "net/tap.h"
1de7afc9
PB
22#include "qemu/error-report.h"
23#include "qemu/timer.h"
0d09e41a
PB
24#include "hw/virtio/virtio-net.h"
25#include "net/vhost_net.h"
9d8c6a25 26#include "net/announce.h"
17ec5a86 27#include "hw/virtio/virtio-bus.h"
e688df6b 28#include "qapi/error.h"
9af23989 29#include "qapi/qapi-events-net.h"
a27bd6c7 30#include "hw/qdev-properties.h"
1399c60d 31#include "hw/virtio/virtio-access.h"
f8d806c9 32#include "migration/misc.h"
9473939e 33#include "standard-headers/linux/ethtool.h"
9d8c6a25 34#include "trace.h"
fbe78f4f 35
0ce0e8f4 36#define VIRTIO_NET_VM_VERSION 11
b6503ed9 37
4ffb17f5 38#define MAC_TABLE_ENTRIES 64
f21c0ed9 39#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
9d6271b8 40
1c0fbfa3
MT
41/* previously fixed value */
42#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
9b02e161
WW
43#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
44
1c0fbfa3
MT
45/* for now, only allow larger queues; with virtio-1, guest can downsize */
46#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
9b02e161 47#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
1c0fbfa3 48
2974e916
YB
49#define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
50
51#define VIRTIO_NET_TCP_FLAG 0x3F
52#define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
53
54/* IPv4 max payload, 16 bits in the header */
55#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
56#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
57
58/* header length value in ip header without option */
59#define VIRTIO_NET_IP4_HEADER_LENGTH 5
60
61#define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
62#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
63
64/* Purge coalesced packets timer interval, This value affects the performance
65 a lot, and should be tuned carefully, '300000'(300us) is the recommended
66 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
67 tso/gso/gro 'off'. */
68#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
69
70/* temporary until standard header include it */
71#if !defined(VIRTIO_NET_HDR_F_RSC_INFO)
72
73#define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc_ext data in csum_ fields */
d47e5e31 74#define VIRTIO_NET_F_RSC_EXT 61
2974e916
YB
75
76static inline __virtio16 *virtio_net_rsc_ext_num_packets(
77 struct virtio_net_hdr *hdr)
78{
79 return &hdr->csum_start;
80}
81
82static inline __virtio16 *virtio_net_rsc_ext_num_dupacks(
83 struct virtio_net_hdr *hdr)
84{
85 return &hdr->csum_offset;
86}
87
88#endif
89
14f9b664 90static VirtIOFeature feature_sizes[] = {
127833ee 91 {.flags = 1ULL << VIRTIO_NET_F_MAC,
ba550851 92 .end = virtio_endof(struct virtio_net_config, mac)},
127833ee 93 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
ba550851 94 .end = virtio_endof(struct virtio_net_config, status)},
127833ee 95 {.flags = 1ULL << VIRTIO_NET_F_MQ,
ba550851 96 .end = virtio_endof(struct virtio_net_config, max_virtqueue_pairs)},
127833ee 97 {.flags = 1ULL << VIRTIO_NET_F_MTU,
ba550851 98 .end = virtio_endof(struct virtio_net_config, mtu)},
9473939e 99 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
ba550851 100 .end = virtio_endof(struct virtio_net_config, duplex)},
14f9b664
JL
101 {}
102};
103
fed699f9 104static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
0c87e93e
JW
105{
106 VirtIONet *n = qemu_get_nic_opaque(nc);
107
fed699f9 108 return &n->vqs[nc->queue_index];
0c87e93e 109}
fed699f9
JW
110
111static int vq2q(int queue_index)
112{
113 return queue_index / 2;
114}
115
fbe78f4f
AL
116/* TODO
117 * - we could suppress RX interrupt if we were so inclined.
118 */
119
0f03eca6 120static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
fbe78f4f 121{
17a0ca55 122 VirtIONet *n = VIRTIO_NET(vdev);
fbe78f4f
AL
123 struct virtio_net_config netcfg;
124
1399c60d
RR
125 virtio_stw_p(vdev, &netcfg.status, n->status);
126 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
a93e599d 127 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
79674068 128 memcpy(netcfg.mac, n->mac, ETH_ALEN);
9473939e
JB
129 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
130 netcfg.duplex = n->net_conf.duplex;
14f9b664 131 memcpy(config, &netcfg, n->config_size);
fbe78f4f
AL
132}
133
0f03eca6
AL
134static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
135{
17a0ca55 136 VirtIONet *n = VIRTIO_NET(vdev);
14f9b664 137 struct virtio_net_config netcfg = {};
0f03eca6 138
14f9b664 139 memcpy(&netcfg, config, n->config_size);
0f03eca6 140
95129d6f
CH
141 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
142 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
c1943a3f 143 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
79674068 144 memcpy(n->mac, netcfg.mac, ETH_ALEN);
b356f76d 145 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
0f03eca6
AL
146 }
147}
148
783e7706
MT
149static bool virtio_net_started(VirtIONet *n, uint8_t status)
150{
17a0ca55 151 VirtIODevice *vdev = VIRTIO_DEVICE(n);
783e7706 152 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
17a0ca55 153 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
783e7706
MT
154}
155
b2c929f0
DDAG
156static void virtio_net_announce_notify(VirtIONet *net)
157{
158 VirtIODevice *vdev = VIRTIO_DEVICE(net);
159 trace_virtio_net_announce_notify();
160
161 net->status |= VIRTIO_NET_S_ANNOUNCE;
162 virtio_notify_config(vdev);
163}
164
f57fcf70
JW
165static void virtio_net_announce_timer(void *opaque)
166{
167 VirtIONet *n = opaque;
9d8c6a25 168 trace_virtio_net_announce_timer(n->announce_timer.round);
f57fcf70 169
9d8c6a25 170 n->announce_timer.round--;
b2c929f0
DDAG
171 virtio_net_announce_notify(n);
172}
173
174static void virtio_net_announce(NetClientState *nc)
175{
176 VirtIONet *n = qemu_get_nic_opaque(nc);
177 VirtIODevice *vdev = VIRTIO_DEVICE(n);
178
179 /*
180 * Make sure the virtio migration announcement timer isn't running
181 * If it is, let it trigger announcement so that we do not cause
182 * confusion.
183 */
184 if (n->announce_timer.round) {
185 return;
186 }
187
188 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
189 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
190 virtio_net_announce_notify(n);
191 }
f57fcf70
JW
192}
193
783e7706 194static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
afbaa7b4 195{
17a0ca55 196 VirtIODevice *vdev = VIRTIO_DEVICE(n);
b356f76d 197 NetClientState *nc = qemu_get_queue(n->nic);
fed699f9 198 int queues = n->multiqueue ? n->max_queues : 1;
b356f76d 199
ed8b4afe 200 if (!get_vhost_net(nc->peer)) {
afbaa7b4
MT
201 return;
202 }
fed699f9 203
8c1ac475
RK
204 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
205 !!n->vhost_started) {
afbaa7b4
MT
206 return;
207 }
208 if (!n->vhost_started) {
086abc1c
MT
209 int r, i;
210
1bfa316c
GK
211 if (n->needs_vnet_hdr_swap) {
212 error_report("backend does not support %s vnet headers; "
213 "falling back on userspace virtio",
214 virtio_is_big_endian(vdev) ? "BE" : "LE");
215 return;
216 }
217
086abc1c
MT
218 /* Any packets outstanding? Purge them to avoid touching rings
219 * when vhost is running.
220 */
221 for (i = 0; i < queues; i++) {
222 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
223
224 /* Purge both directions: TX and RX. */
225 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
226 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
227 }
228
a93e599d
MC
229 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
230 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
231 if (r < 0) {
232 error_report("%uBytes MTU not supported by the backend",
233 n->net_conf.mtu);
234
235 return;
236 }
237 }
238
1830b80f 239 n->vhost_started = 1;
17a0ca55 240 r = vhost_net_start(vdev, n->nic->ncs, queues);
afbaa7b4 241 if (r < 0) {
e7b43f7e
SH
242 error_report("unable to start vhost net: %d: "
243 "falling back on userspace virtio", -r);
1830b80f 244 n->vhost_started = 0;
afbaa7b4
MT
245 }
246 } else {
17a0ca55 247 vhost_net_stop(vdev, n->nic->ncs, queues);
afbaa7b4
MT
248 n->vhost_started = 0;
249 }
250}
251
1bfa316c
GK
252static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
253 NetClientState *peer,
254 bool enable)
255{
256 if (virtio_is_big_endian(vdev)) {
257 return qemu_set_vnet_be(peer, enable);
258 } else {
259 return qemu_set_vnet_le(peer, enable);
260 }
261}
262
263static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
264 int queues, bool enable)
265{
266 int i;
267
268 for (i = 0; i < queues; i++) {
269 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
270 enable) {
271 while (--i >= 0) {
272 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
273 }
274
275 return true;
276 }
277 }
278
279 return false;
280}
281
282static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
283{
284 VirtIODevice *vdev = VIRTIO_DEVICE(n);
285 int queues = n->multiqueue ? n->max_queues : 1;
286
287 if (virtio_net_started(n, status)) {
288 /* Before using the device, we tell the network backend about the
289 * endianness to use when parsing vnet headers. If the backend
290 * can't do it, we fallback onto fixing the headers in the core
291 * virtio-net code.
292 */
293 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
294 queues, true);
295 } else if (virtio_net_started(n, vdev->status)) {
296 /* After using the device, we need to reset the network backend to
297 * the default (guest native endianness), otherwise the guest may
298 * lose network connectivity if it is rebooted into a different
299 * endianness.
300 */
301 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
302 }
303}
304
283e2c2a
YB
305static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
306{
307 unsigned int dropped = virtqueue_drop_all(vq);
308 if (dropped) {
309 virtio_notify(vdev, vq);
310 }
311}
312
783e7706
MT
313static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
314{
17a0ca55 315 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9
JW
316 VirtIONetQueue *q;
317 int i;
318 uint8_t queue_status;
783e7706 319
1bfa316c 320 virtio_net_vnet_endian_status(n, status);
783e7706
MT
321 virtio_net_vhost_status(n, status);
322
fed699f9 323 for (i = 0; i < n->max_queues; i++) {
38705bb5
FZ
324 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
325 bool queue_started;
fed699f9 326 q = &n->vqs[i];
783e7706 327
fed699f9
JW
328 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
329 queue_status = 0;
783e7706 330 } else {
fed699f9 331 queue_status = status;
783e7706 332 }
38705bb5
FZ
333 queue_started =
334 virtio_net_started(n, queue_status) && !n->vhost_started;
335
336 if (queue_started) {
337 qemu_flush_queued_packets(ncs);
338 }
fed699f9
JW
339
340 if (!q->tx_waiting) {
341 continue;
342 }
343
38705bb5 344 if (queue_started) {
fed699f9 345 if (q->tx_timer) {
bc72ad67
AB
346 timer_mod(q->tx_timer,
347 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
fed699f9
JW
348 } else {
349 qemu_bh_schedule(q->tx_bh);
350 }
783e7706 351 } else {
fed699f9 352 if (q->tx_timer) {
bc72ad67 353 timer_del(q->tx_timer);
fed699f9
JW
354 } else {
355 qemu_bh_cancel(q->tx_bh);
356 }
283e2c2a 357 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
70e53e6e
JW
358 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
359 vdev->vm_running) {
283e2c2a
YB
360 /* if tx is waiting we are likely have some packets in tx queue
361 * and disabled notification */
362 q->tx_waiting = 0;
363 virtio_queue_set_notification(q->tx_vq, 1);
364 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
365 }
783e7706
MT
366 }
367 }
368}
369
4e68f7a0 370static void virtio_net_set_link_status(NetClientState *nc)
554c97dd 371{
cc1f0f45 372 VirtIONet *n = qemu_get_nic_opaque(nc);
17a0ca55 373 VirtIODevice *vdev = VIRTIO_DEVICE(n);
554c97dd
AL
374 uint16_t old_status = n->status;
375
eb6b6c12 376 if (nc->link_down)
554c97dd
AL
377 n->status &= ~VIRTIO_NET_S_LINK_UP;
378 else
379 n->status |= VIRTIO_NET_S_LINK_UP;
380
381 if (n->status != old_status)
17a0ca55 382 virtio_notify_config(vdev);
afbaa7b4 383
17a0ca55 384 virtio_net_set_status(vdev, vdev->status);
554c97dd
AL
385}
386
b1be4280
AK
387static void rxfilter_notify(NetClientState *nc)
388{
b1be4280
AK
389 VirtIONet *n = qemu_get_nic_opaque(nc);
390
391 if (nc->rxfilter_notify_enabled) {
96e35046 392 gchar *path = object_get_canonical_path(OBJECT(n->qdev));
06150279 393 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
3ab72385 394 n->netclient_name, path);
96e35046 395 g_free(path);
b1be4280
AK
396
397 /* disable event notification to avoid events flooding */
398 nc->rxfilter_notify_enabled = 0;
399 }
400}
401
f7bc8ef8
AK
402static intList *get_vlan_table(VirtIONet *n)
403{
404 intList *list, *entry;
405 int i, j;
406
407 list = NULL;
408 for (i = 0; i < MAX_VLAN >> 5; i++) {
409 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
410 if (n->vlans[i] & (1U << j)) {
411 entry = g_malloc0(sizeof(*entry));
412 entry->value = (i << 5) + j;
413 entry->next = list;
414 list = entry;
415 }
416 }
417 }
418
419 return list;
420}
421
b1be4280
AK
422static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
423{
424 VirtIONet *n = qemu_get_nic_opaque(nc);
f7bc8ef8 425 VirtIODevice *vdev = VIRTIO_DEVICE(n);
b1be4280
AK
426 RxFilterInfo *info;
427 strList *str_list, *entry;
f7bc8ef8 428 int i;
b1be4280
AK
429
430 info = g_malloc0(sizeof(*info));
431 info->name = g_strdup(nc->name);
432 info->promiscuous = n->promisc;
433
434 if (n->nouni) {
435 info->unicast = RX_STATE_NONE;
436 } else if (n->alluni) {
437 info->unicast = RX_STATE_ALL;
438 } else {
439 info->unicast = RX_STATE_NORMAL;
440 }
441
442 if (n->nomulti) {
443 info->multicast = RX_STATE_NONE;
444 } else if (n->allmulti) {
445 info->multicast = RX_STATE_ALL;
446 } else {
447 info->multicast = RX_STATE_NORMAL;
448 }
449
450 info->broadcast_allowed = n->nobcast;
451 info->multicast_overflow = n->mac_table.multi_overflow;
452 info->unicast_overflow = n->mac_table.uni_overflow;
453
b0575ba4 454 info->main_mac = qemu_mac_strdup_printf(n->mac);
b1be4280
AK
455
456 str_list = NULL;
457 for (i = 0; i < n->mac_table.first_multi; i++) {
458 entry = g_malloc0(sizeof(*entry));
b0575ba4 459 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
b1be4280
AK
460 entry->next = str_list;
461 str_list = entry;
462 }
463 info->unicast_table = str_list;
464
465 str_list = NULL;
466 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
467 entry = g_malloc0(sizeof(*entry));
b0575ba4 468 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
b1be4280
AK
469 entry->next = str_list;
470 str_list = entry;
471 }
472 info->multicast_table = str_list;
f7bc8ef8 473 info->vlan_table = get_vlan_table(n);
b1be4280 474
95129d6f 475 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
f7bc8ef8
AK
476 info->vlan = RX_STATE_ALL;
477 } else if (!info->vlan_table) {
478 info->vlan = RX_STATE_NONE;
479 } else {
480 info->vlan = RX_STATE_NORMAL;
b1be4280 481 }
b1be4280
AK
482
483 /* enable event notification after query */
484 nc->rxfilter_notify_enabled = 1;
485
486 return info;
487}
488
002437cd
AL
489static void virtio_net_reset(VirtIODevice *vdev)
490{
17a0ca55 491 VirtIONet *n = VIRTIO_NET(vdev);
94b52958 492 int i;
002437cd
AL
493
494 /* Reset back to compatibility mode */
495 n->promisc = 1;
496 n->allmulti = 0;
015cb166
AW
497 n->alluni = 0;
498 n->nomulti = 0;
499 n->nouni = 0;
500 n->nobcast = 0;
fed699f9
JW
501 /* multiqueue is disabled by default */
502 n->curr_queues = 1;
9d8c6a25
DDAG
503 timer_del(n->announce_timer.tm);
504 n->announce_timer.round = 0;
f57fcf70 505 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
b6503ed9 506
f21c0ed9 507 /* Flush any MAC and VLAN filter table state */
b6503ed9 508 n->mac_table.in_use = 0;
2d9aba39 509 n->mac_table.first_multi = 0;
8fd2a2f1
AW
510 n->mac_table.multi_overflow = 0;
511 n->mac_table.uni_overflow = 0;
b6503ed9 512 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
41dc8a67 513 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
702d66a8 514 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
f21c0ed9 515 memset(n->vlans, 0, MAX_VLAN >> 3);
94b52958
GK
516
517 /* Flush any async TX */
518 for (i = 0; i < n->max_queues; i++) {
519 NetClientState *nc = qemu_get_subqueue(n->nic, i);
520
521 if (nc->peer) {
522 qemu_flush_or_purge_queued_packets(nc->peer, true);
523 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
524 }
525 }
002437cd
AL
526}
527
6e371ab8 528static void peer_test_vnet_hdr(VirtIONet *n)
3a330134 529{
b356f76d
JW
530 NetClientState *nc = qemu_get_queue(n->nic);
531 if (!nc->peer) {
6e371ab8 532 return;
b356f76d 533 }
3a330134 534
d6085e3a 535 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
6e371ab8 536}
3a330134 537
6e371ab8
MT
538static int peer_has_vnet_hdr(VirtIONet *n)
539{
3a330134
MM
540 return n->has_vnet_hdr;
541}
542
0ce0e8f4
MM
543static int peer_has_ufo(VirtIONet *n)
544{
545 if (!peer_has_vnet_hdr(n))
546 return 0;
547
d6085e3a 548 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
0ce0e8f4
MM
549
550 return n->has_ufo;
551}
552
bb9d17f8
CH
553static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
554 int version_1)
ff3a8066 555{
fed699f9
JW
556 int i;
557 NetClientState *nc;
558
ff3a8066
MT
559 n->mergeable_rx_bufs = mergeable_rx_bufs;
560
bb9d17f8
CH
561 if (version_1) {
562 n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
563 } else {
564 n->guest_hdr_len = n->mergeable_rx_bufs ?
565 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
566 sizeof(struct virtio_net_hdr);
567 }
ff3a8066 568
fed699f9
JW
569 for (i = 0; i < n->max_queues; i++) {
570 nc = qemu_get_subqueue(n->nic, i);
571
572 if (peer_has_vnet_hdr(n) &&
d6085e3a
SH
573 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
574 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
fed699f9
JW
575 n->host_hdr_len = n->guest_hdr_len;
576 }
ff3a8066
MT
577 }
578}
579
2eef278b
MT
580static int virtio_net_max_tx_queue_size(VirtIONet *n)
581{
582 NetClientState *peer = n->nic_conf.peers.ncs[0];
583
584 /*
585 * Backends other than vhost-user don't support max queue size.
586 */
587 if (!peer) {
588 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
589 }
590
591 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
592 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
593 }
594
595 return VIRTQUEUE_MAX_SIZE;
596}
597
fed699f9
JW
598static int peer_attach(VirtIONet *n, int index)
599{
600 NetClientState *nc = qemu_get_subqueue(n->nic, index);
601
602 if (!nc->peer) {
603 return 0;
604 }
605
f394b2e2 606 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
7263a0ad
CO
607 vhost_set_vring_enable(nc->peer, 1);
608 }
609
f394b2e2 610 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
fed699f9
JW
611 return 0;
612 }
613
1074b879
JW
614 if (n->max_queues == 1) {
615 return 0;
616 }
617
fed699f9
JW
618 return tap_enable(nc->peer);
619}
620
621static int peer_detach(VirtIONet *n, int index)
622{
623 NetClientState *nc = qemu_get_subqueue(n->nic, index);
624
625 if (!nc->peer) {
626 return 0;
627 }
628
f394b2e2 629 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
7263a0ad
CO
630 vhost_set_vring_enable(nc->peer, 0);
631 }
632
f394b2e2 633 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
fed699f9
JW
634 return 0;
635 }
636
637 return tap_disable(nc->peer);
638}
639
640static void virtio_net_set_queues(VirtIONet *n)
641{
642 int i;
ddfa83ea 643 int r;
fed699f9 644
68b5f314
YB
645 if (n->nic->peer_deleted) {
646 return;
647 }
648
fed699f9
JW
649 for (i = 0; i < n->max_queues; i++) {
650 if (i < n->curr_queues) {
ddfa83ea
JS
651 r = peer_attach(n, i);
652 assert(!r);
fed699f9 653 } else {
ddfa83ea
JS
654 r = peer_detach(n, i);
655 assert(!r);
fed699f9
JW
656 }
657 }
658}
659
ec57db16 660static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
fed699f9 661
9d5b731d
JW
662static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
663 Error **errp)
fbe78f4f 664{
17a0ca55 665 VirtIONet *n = VIRTIO_NET(vdev);
b356f76d 666 NetClientState *nc = qemu_get_queue(n->nic);
fbe78f4f 667
da3e8a23
SZ
668 /* Firstly sync all virtio-net possible supported features */
669 features |= n->host_features;
670
0cd09c3a 671 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
c9f79a3f 672
6e371ab8 673 if (!peer_has_vnet_hdr(n)) {
0cd09c3a
CH
674 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
675 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
676 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
677 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
8172539d 678
0cd09c3a
CH
679 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
680 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
681 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
682 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
8172539d 683 }
3a330134 684
8172539d 685 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
0cd09c3a
CH
686 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
687 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
3a330134
MM
688 }
689
ed8b4afe 690 if (!get_vhost_net(nc->peer)) {
9bc6304c
MT
691 return features;
692 }
2974e916 693
75ebec11
MC
694 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
695 vdev->backend_features = features;
696
697 if (n->mtu_bypass_backend &&
698 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
699 features |= (1ULL << VIRTIO_NET_F_MTU);
700 }
701
702 return features;
fbe78f4f
AL
703}
704
019a3edb 705static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
8eca6b1b 706{
019a3edb 707 uint64_t features = 0;
8eca6b1b
AL
708
709 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
710 * but also these: */
0cd09c3a
CH
711 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
712 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
713 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
714 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
715 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
8eca6b1b 716
8172539d 717 return features;
8eca6b1b
AL
718}
719
644c9858
DF
720static void virtio_net_apply_guest_offloads(VirtIONet *n)
721{
ad37bb3b 722 qemu_set_offload(qemu_get_queue(n->nic)->peer,
644c9858
DF
723 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
724 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
725 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
726 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
727 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
728}
729
730static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
731{
732 static const uint64_t guest_offloads_mask =
733 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
734 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
735 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
736 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
737 (1ULL << VIRTIO_NET_F_GUEST_UFO);
738
739 return guest_offloads_mask & features;
740}
741
742static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
743{
744 VirtIODevice *vdev = VIRTIO_DEVICE(n);
745 return virtio_net_guest_offloads_by_features(vdev->guest_features);
746}
747
d5aaa1b0 748static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
fbe78f4f 749{
17a0ca55 750 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9
JW
751 int i;
752
75ebec11
MC
753 if (n->mtu_bypass_backend &&
754 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
755 features &= ~(1ULL << VIRTIO_NET_F_MTU);
756 }
757
ef546f12 758 virtio_net_set_multiqueue(n,
95129d6f 759 virtio_has_feature(features, VIRTIO_NET_F_MQ));
fbe78f4f 760
ef546f12 761 virtio_net_set_mrg_rx_bufs(n,
95129d6f
CH
762 virtio_has_feature(features,
763 VIRTIO_NET_F_MRG_RXBUF),
764 virtio_has_feature(features,
765 VIRTIO_F_VERSION_1));
f5436dd9 766
2974e916
YB
767 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
768 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
769 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
770 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
771
f5436dd9 772 if (n->has_vnet_hdr) {
644c9858
DF
773 n->curr_guest_offloads =
774 virtio_net_guest_offloads_by_features(features);
775 virtio_net_apply_guest_offloads(n);
f5436dd9 776 }
fed699f9
JW
777
778 for (i = 0; i < n->max_queues; i++) {
779 NetClientState *nc = qemu_get_subqueue(n->nic, i);
780
ed8b4afe 781 if (!get_vhost_net(nc->peer)) {
fed699f9
JW
782 continue;
783 }
ed8b4afe 784 vhost_net_ack_features(get_vhost_net(nc->peer), features);
dc14a397 785 }
0b1eaa88 786
95129d6f 787 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
0b1eaa88
SF
788 memset(n->vlans, 0, MAX_VLAN >> 3);
789 } else {
790 memset(n->vlans, 0xff, MAX_VLAN >> 3);
791 }
fbe78f4f
AL
792}
793
002437cd 794static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
921ac5d0 795 struct iovec *iov, unsigned int iov_cnt)
002437cd
AL
796{
797 uint8_t on;
921ac5d0 798 size_t s;
b1be4280 799 NetClientState *nc = qemu_get_queue(n->nic);
002437cd 800
921ac5d0
MT
801 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
802 if (s != sizeof(on)) {
803 return VIRTIO_NET_ERR;
002437cd
AL
804 }
805
dd23454b 806 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
002437cd 807 n->promisc = on;
dd23454b 808 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
002437cd 809 n->allmulti = on;
dd23454b 810 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
015cb166 811 n->alluni = on;
dd23454b 812 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
015cb166 813 n->nomulti = on;
dd23454b 814 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
015cb166 815 n->nouni = on;
dd23454b 816 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
015cb166 817 n->nobcast = on;
921ac5d0 818 } else {
002437cd 819 return VIRTIO_NET_ERR;
921ac5d0 820 }
002437cd 821
b1be4280
AK
822 rxfilter_notify(nc);
823
002437cd
AL
824 return VIRTIO_NET_OK;
825}
826
644c9858
DF
827static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
828 struct iovec *iov, unsigned int iov_cnt)
829{
830 VirtIODevice *vdev = VIRTIO_DEVICE(n);
831 uint64_t offloads;
832 size_t s;
833
95129d6f 834 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
644c9858
DF
835 return VIRTIO_NET_ERR;
836 }
837
838 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
839 if (s != sizeof(offloads)) {
840 return VIRTIO_NET_ERR;
841 }
842
843 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
844 uint64_t supported_offloads;
845
189ae6bb
JW
846 offloads = virtio_ldq_p(vdev, &offloads);
847
644c9858
DF
848 if (!n->has_vnet_hdr) {
849 return VIRTIO_NET_ERR;
850 }
851
2974e916
YB
852 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
853 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
854 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
855 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
856 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
857
644c9858
DF
858 supported_offloads = virtio_net_supported_guest_offloads(n);
859 if (offloads & ~supported_offloads) {
860 return VIRTIO_NET_ERR;
861 }
862
863 n->curr_guest_offloads = offloads;
864 virtio_net_apply_guest_offloads(n);
865
866 return VIRTIO_NET_OK;
867 } else {
868 return VIRTIO_NET_ERR;
869 }
870}
871
b6503ed9 872static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
921ac5d0 873 struct iovec *iov, unsigned int iov_cnt)
b6503ed9 874{
1399c60d 875 VirtIODevice *vdev = VIRTIO_DEVICE(n);
b6503ed9 876 struct virtio_net_ctrl_mac mac_data;
921ac5d0 877 size_t s;
b1be4280 878 NetClientState *nc = qemu_get_queue(n->nic);
b6503ed9 879
c1943a3f
AK
880 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
881 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
882 return VIRTIO_NET_ERR;
883 }
884 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
885 assert(s == sizeof(n->mac));
b356f76d 886 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
b1be4280
AK
887 rxfilter_notify(nc);
888
c1943a3f
AK
889 return VIRTIO_NET_OK;
890 }
891
921ac5d0 892 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
b6503ed9 893 return VIRTIO_NET_ERR;
921ac5d0 894 }
b6503ed9 895
cae2e556
AK
896 int in_use = 0;
897 int first_multi = 0;
898 uint8_t uni_overflow = 0;
899 uint8_t multi_overflow = 0;
900 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
b6503ed9 901
921ac5d0
MT
902 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
903 sizeof(mac_data.entries));
1399c60d 904 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
921ac5d0 905 if (s != sizeof(mac_data.entries)) {
b1be4280 906 goto error;
921ac5d0
MT
907 }
908 iov_discard_front(&iov, &iov_cnt, s);
b6503ed9 909
921ac5d0 910 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
b1be4280 911 goto error;
921ac5d0 912 }
b6503ed9
AL
913
914 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
cae2e556 915 s = iov_to_buf(iov, iov_cnt, 0, macs,
921ac5d0
MT
916 mac_data.entries * ETH_ALEN);
917 if (s != mac_data.entries * ETH_ALEN) {
b1be4280 918 goto error;
921ac5d0 919 }
cae2e556 920 in_use += mac_data.entries;
b6503ed9 921 } else {
cae2e556 922 uni_overflow = 1;
b6503ed9
AL
923 }
924
921ac5d0
MT
925 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
926
cae2e556 927 first_multi = in_use;
2d9aba39 928
921ac5d0
MT
929 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
930 sizeof(mac_data.entries));
1399c60d 931 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
921ac5d0 932 if (s != sizeof(mac_data.entries)) {
b1be4280 933 goto error;
921ac5d0
MT
934 }
935
936 iov_discard_front(&iov, &iov_cnt, s);
b6503ed9 937
921ac5d0 938 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
b1be4280 939 goto error;
921ac5d0 940 }
b6503ed9 941
edc24385 942 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
cae2e556 943 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
921ac5d0
MT
944 mac_data.entries * ETH_ALEN);
945 if (s != mac_data.entries * ETH_ALEN) {
b1be4280 946 goto error;
8fd2a2f1 947 }
cae2e556 948 in_use += mac_data.entries;
921ac5d0 949 } else {
cae2e556 950 multi_overflow = 1;
b6503ed9
AL
951 }
952
cae2e556
AK
953 n->mac_table.in_use = in_use;
954 n->mac_table.first_multi = first_multi;
955 n->mac_table.uni_overflow = uni_overflow;
956 n->mac_table.multi_overflow = multi_overflow;
957 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
958 g_free(macs);
b1be4280
AK
959 rxfilter_notify(nc);
960
b6503ed9 961 return VIRTIO_NET_OK;
b1be4280
AK
962
963error:
cae2e556 964 g_free(macs);
b1be4280 965 return VIRTIO_NET_ERR;
b6503ed9
AL
966}
967
f21c0ed9 968static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
921ac5d0 969 struct iovec *iov, unsigned int iov_cnt)
f21c0ed9 970{
1399c60d 971 VirtIODevice *vdev = VIRTIO_DEVICE(n);
f21c0ed9 972 uint16_t vid;
921ac5d0 973 size_t s;
b1be4280 974 NetClientState *nc = qemu_get_queue(n->nic);
f21c0ed9 975
921ac5d0 976 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1399c60d 977 vid = virtio_lduw_p(vdev, &vid);
921ac5d0 978 if (s != sizeof(vid)) {
f21c0ed9
AL
979 return VIRTIO_NET_ERR;
980 }
981
f21c0ed9
AL
982 if (vid >= MAX_VLAN)
983 return VIRTIO_NET_ERR;
984
985 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
986 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
987 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
988 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
989 else
990 return VIRTIO_NET_ERR;
991
b1be4280
AK
992 rxfilter_notify(nc);
993
f21c0ed9
AL
994 return VIRTIO_NET_OK;
995}
996
f57fcf70
JW
997static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
998 struct iovec *iov, unsigned int iov_cnt)
999{
9d8c6a25 1000 trace_virtio_net_handle_announce(n->announce_timer.round);
f57fcf70
JW
1001 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1002 n->status & VIRTIO_NET_S_ANNOUNCE) {
1003 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
9d8c6a25
DDAG
1004 if (n->announce_timer.round) {
1005 qemu_announce_timer_step(&n->announce_timer);
f57fcf70
JW
1006 }
1007 return VIRTIO_NET_OK;
1008 } else {
1009 return VIRTIO_NET_ERR;
1010 }
1011}
1012
fed699f9 1013static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
f8f7c533 1014 struct iovec *iov, unsigned int iov_cnt)
fed699f9 1015{
17a0ca55 1016 VirtIODevice *vdev = VIRTIO_DEVICE(n);
f8f7c533
JW
1017 struct virtio_net_ctrl_mq mq;
1018 size_t s;
1019 uint16_t queues;
fed699f9 1020
f8f7c533
JW
1021 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1022 if (s != sizeof(mq)) {
fed699f9
JW
1023 return VIRTIO_NET_ERR;
1024 }
1025
1026 if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1027 return VIRTIO_NET_ERR;
1028 }
1029
1399c60d 1030 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
fed699f9 1031
f8f7c533
JW
1032 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1033 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1034 queues > n->max_queues ||
fed699f9
JW
1035 !n->multiqueue) {
1036 return VIRTIO_NET_ERR;
1037 }
1038
f8f7c533 1039 n->curr_queues = queues;
fed699f9
JW
1040 /* stop the backend before changing the number of queues to avoid handling a
1041 * disabled queue */
17a0ca55 1042 virtio_net_set_status(vdev, vdev->status);
fed699f9
JW
1043 virtio_net_set_queues(n);
1044
1045 return VIRTIO_NET_OK;
1046}
ba7eadb5 1047
3d11d36c
AL
1048static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1049{
17a0ca55 1050 VirtIONet *n = VIRTIO_NET(vdev);
3d11d36c
AL
1051 struct virtio_net_ctrl_hdr ctrl;
1052 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
51b19ebe 1053 VirtQueueElement *elem;
921ac5d0 1054 size_t s;
771b6ed3 1055 struct iovec *iov, *iov2;
921ac5d0 1056 unsigned int iov_cnt;
3d11d36c 1057
51b19ebe
PB
1058 for (;;) {
1059 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1060 if (!elem) {
1061 break;
1062 }
1063 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1064 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
ba7eadb5
GK
1065 virtio_error(vdev, "virtio-net ctrl missing headers");
1066 virtqueue_detach_element(vq, elem, 0);
1067 g_free(elem);
1068 break;
3d11d36c
AL
1069 }
1070
51b19ebe
PB
1071 iov_cnt = elem->out_num;
1072 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
921ac5d0
MT
1073 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1074 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1075 if (s != sizeof(ctrl)) {
1076 status = VIRTIO_NET_ERR;
dd23454b 1077 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
921ac5d0
MT
1078 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1079 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1080 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1081 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1082 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
f57fcf70
JW
1083 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1084 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
fed699f9 1085 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
f8f7c533 1086 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
644c9858
DF
1087 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1088 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
3d11d36c
AL
1089 }
1090
51b19ebe 1091 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
921ac5d0 1092 assert(s == sizeof(status));
3d11d36c 1093
51b19ebe 1094 virtqueue_push(vq, elem, sizeof(status));
3d11d36c 1095 virtio_notify(vdev, vq);
771b6ed3 1096 g_free(iov2);
51b19ebe 1097 g_free(elem);
3d11d36c
AL
1098 }
1099}
1100
fbe78f4f
AL
1101/* RX */
1102
1103static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1104{
17a0ca55 1105 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 1106 int queue_index = vq2q(virtio_get_queue_index(vq));
8aeff62d 1107
fed699f9 1108 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
fbe78f4f
AL
1109}
1110
4e68f7a0 1111static int virtio_net_can_receive(NetClientState *nc)
fbe78f4f 1112{
cc1f0f45 1113 VirtIONet *n = qemu_get_nic_opaque(nc);
17a0ca55 1114 VirtIODevice *vdev = VIRTIO_DEVICE(n);
fed699f9 1115 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
0c87e93e 1116
17a0ca55 1117 if (!vdev->vm_running) {
95477323
MT
1118 return 0;
1119 }
cdd5cc12 1120
fed699f9
JW
1121 if (nc->queue_index >= n->curr_queues) {
1122 return 0;
1123 }
1124
0c87e93e 1125 if (!virtio_queue_ready(q->rx_vq) ||
17a0ca55 1126 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
fbe78f4f 1127 return 0;
0c87e93e 1128 }
fbe78f4f 1129
cdd5cc12
MM
1130 return 1;
1131}
1132
0c87e93e 1133static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
cdd5cc12 1134{
0c87e93e
JW
1135 VirtIONet *n = q->n;
1136 if (virtio_queue_empty(q->rx_vq) ||
fbe78f4f 1137 (n->mergeable_rx_bufs &&
0c87e93e
JW
1138 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1139 virtio_queue_set_notification(q->rx_vq, 1);
06b12970
TL
1140
1141 /* To avoid a race condition where the guest has made some buffers
1142 * available after the above check but before notification was
1143 * enabled, check for available buffers again.
1144 */
0c87e93e 1145 if (virtio_queue_empty(q->rx_vq) ||
06b12970 1146 (n->mergeable_rx_bufs &&
0c87e93e 1147 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
06b12970 1148 return 0;
0c87e93e 1149 }
fbe78f4f
AL
1150 }
1151
0c87e93e 1152 virtio_queue_set_notification(q->rx_vq, 0);
fbe78f4f
AL
1153 return 1;
1154}
1155
1399c60d 1156static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
032a74a1 1157{
1399c60d
RR
1158 virtio_tswap16s(vdev, &hdr->hdr_len);
1159 virtio_tswap16s(vdev, &hdr->gso_size);
1160 virtio_tswap16s(vdev, &hdr->csum_start);
1161 virtio_tswap16s(vdev, &hdr->csum_offset);
032a74a1
CLG
1162}
1163
1d41b0c1
AL
1164/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1165 * it never finds out that the packets don't have valid checksums. This
1166 * causes dhclient to get upset. Fedora's carried a patch for ages to
1167 * fix this with Xen but it hasn't appeared in an upstream release of
1168 * dhclient yet.
1169 *
1170 * To avoid breaking existing guests, we catch udp packets and add
1171 * checksums. This is terrible but it's better than hacking the guest
1172 * kernels.
1173 *
1174 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1175 * we should provide a mechanism to disable it to avoid polluting the host
1176 * cache.
1177 */
1178static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
22cc84db 1179 uint8_t *buf, size_t size)
1d41b0c1
AL
1180{
1181 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1182 (size > 27 && size < 1500) && /* normal sized MTU */
1183 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1184 (buf[23] == 17) && /* ip.protocol == UDP */
1185 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
22cc84db 1186 net_checksum_calculate(buf, size);
1d41b0c1
AL
1187 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1188 }
1189}
1190
280598b7
MT
1191static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1192 const void *buf, size_t size)
fbe78f4f 1193{
3a330134 1194 if (n->has_vnet_hdr) {
22cc84db
MT
1195 /* FIXME this cast is evil */
1196 void *wbuf = (void *)buf;
280598b7
MT
1197 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1198 size - n->host_hdr_len);
1bfa316c
GK
1199
1200 if (n->needs_vnet_hdr_swap) {
1201 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1202 }
280598b7 1203 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
22cc84db
MT
1204 } else {
1205 struct virtio_net_hdr hdr = {
1206 .flags = 0,
1207 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1208 };
1209 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
3a330134 1210 }
fbe78f4f
AL
1211}
1212
3831ab20
AL
1213static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1214{
1215 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
f21c0ed9 1216 static const uint8_t vlan[] = {0x81, 0x00};
3831ab20 1217 uint8_t *ptr = (uint8_t *)buf;
b6503ed9 1218 int i;
3831ab20
AL
1219
1220 if (n->promisc)
1221 return 1;
1222
e043ebc6 1223 ptr += n->host_hdr_len;
3a330134 1224
f21c0ed9 1225 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
7542d3e7 1226 int vid = lduw_be_p(ptr + 14) & 0xfff;
f21c0ed9
AL
1227 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1228 return 0;
1229 }
1230
bbe2f399
AW
1231 if (ptr[0] & 1) { // multicast
1232 if (!memcmp(ptr, bcast, sizeof(bcast))) {
015cb166
AW
1233 return !n->nobcast;
1234 } else if (n->nomulti) {
1235 return 0;
8fd2a2f1 1236 } else if (n->allmulti || n->mac_table.multi_overflow) {
bbe2f399
AW
1237 return 1;
1238 }
2d9aba39
AW
1239
1240 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1241 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1242 return 1;
1243 }
1244 }
bbe2f399 1245 } else { // unicast
015cb166
AW
1246 if (n->nouni) {
1247 return 0;
1248 } else if (n->alluni || n->mac_table.uni_overflow) {
8fd2a2f1
AW
1249 return 1;
1250 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
bbe2f399
AW
1251 return 1;
1252 }
3831ab20 1253
2d9aba39
AW
1254 for (i = 0; i < n->mac_table.first_multi; i++) {
1255 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1256 return 1;
1257 }
1258 }
b6503ed9
AL
1259 }
1260
3831ab20
AL
1261 return 0;
1262}
1263
97cd965c
PB
1264static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1265 size_t size)
fbe78f4f 1266{
cc1f0f45 1267 VirtIONet *n = qemu_get_nic_opaque(nc);
fed699f9 1268 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
17a0ca55 1269 VirtIODevice *vdev = VIRTIO_DEVICE(n);
63c58728
MT
1270 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1271 struct virtio_net_hdr_mrg_rxbuf mhdr;
1272 unsigned mhdr_cnt = 0;
22cc84db 1273 size_t offset, i, guest_offset;
fbe78f4f 1274
fed699f9 1275 if (!virtio_net_can_receive(nc)) {
cdd5cc12 1276 return -1;
b356f76d 1277 }
cdd5cc12 1278
940cda94 1279 /* hdr_len refers to the header we supply to the guest */
0c87e93e 1280 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
8aeff62d 1281 return 0;
0c87e93e 1282 }
fbe78f4f 1283
3831ab20 1284 if (!receive_filter(n, buf, size))
4f1c942b 1285 return size;
3831ab20 1286
fbe78f4f
AL
1287 offset = i = 0;
1288
1289 while (offset < size) {
51b19ebe 1290 VirtQueueElement *elem;
fbe78f4f 1291 int len, total;
51b19ebe 1292 const struct iovec *sg;
fbe78f4f 1293
22c253d9 1294 total = 0;
fbe78f4f 1295
51b19ebe
PB
1296 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1297 if (!elem) {
ba10b9c0
GK
1298 if (i) {
1299 virtio_error(vdev, "virtio-net unexpected empty queue: "
1300 "i %zd mergeable %d offset %zd, size %zd, "
1301 "guest hdr len %zd, host hdr len %zd "
1302 "guest features 0x%" PRIx64,
1303 i, n->mergeable_rx_bufs, offset, size,
1304 n->guest_hdr_len, n->host_hdr_len,
1305 vdev->guest_features);
1306 }
1307 return -1;
fbe78f4f
AL
1308 }
1309
51b19ebe 1310 if (elem->in_num < 1) {
ba10b9c0
GK
1311 virtio_error(vdev,
1312 "virtio-net receive queue contains no in buffers");
1313 virtqueue_detach_element(q->rx_vq, elem, 0);
1314 g_free(elem);
1315 return -1;
fbe78f4f
AL
1316 }
1317
51b19ebe 1318 sg = elem->in_sg;
fbe78f4f 1319 if (i == 0) {
c8d28e7e 1320 assert(offset == 0);
63c58728
MT
1321 if (n->mergeable_rx_bufs) {
1322 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
51b19ebe 1323 sg, elem->in_num,
63c58728
MT
1324 offsetof(typeof(mhdr), num_buffers),
1325 sizeof(mhdr.num_buffers));
1326 }
fbe78f4f 1327
51b19ebe 1328 receive_header(n, sg, elem->in_num, buf, size);
c8d28e7e 1329 offset = n->host_hdr_len;
e35e23f6 1330 total += n->guest_hdr_len;
22cc84db
MT
1331 guest_offset = n->guest_hdr_len;
1332 } else {
1333 guest_offset = 0;
fbe78f4f
AL
1334 }
1335
1336 /* copy in packet. ugh */
51b19ebe 1337 len = iov_from_buf(sg, elem->in_num, guest_offset,
dcf6f5e1 1338 buf + offset, size - offset);
fbe78f4f 1339 total += len;
279a4253
MT
1340 offset += len;
1341 /* If buffers can't be merged, at this point we
1342 * must have consumed the complete packet.
1343 * Otherwise, drop it. */
1344 if (!n->mergeable_rx_bufs && offset < size) {
27e57efe 1345 virtqueue_unpop(q->rx_vq, elem, total);
51b19ebe 1346 g_free(elem);
279a4253
MT
1347 return size;
1348 }
fbe78f4f
AL
1349
1350 /* signal other side */
51b19ebe
PB
1351 virtqueue_fill(q->rx_vq, elem, total, i++);
1352 g_free(elem);
fbe78f4f
AL
1353 }
1354
63c58728 1355 if (mhdr_cnt) {
1399c60d 1356 virtio_stw_p(vdev, &mhdr.num_buffers, i);
63c58728
MT
1357 iov_from_buf(mhdr_sg, mhdr_cnt,
1358 0,
1359 &mhdr.num_buffers, sizeof mhdr.num_buffers);
44b15bc5 1360 }
fbe78f4f 1361
0c87e93e 1362 virtqueue_flush(q->rx_vq, i);
17a0ca55 1363 virtio_notify(vdev, q->rx_vq);
4f1c942b
MM
1364
1365 return size;
fbe78f4f
AL
1366}
1367
2974e916 1368static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
97cd965c
PB
1369 size_t size)
1370{
1371 ssize_t r;
1372
1373 rcu_read_lock();
1374 r = virtio_net_receive_rcu(nc, buf, size);
1375 rcu_read_unlock();
1376 return r;
1377}
1378
2974e916
YB
1379static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1380 const uint8_t *buf,
1381 VirtioNetRscUnit *unit)
1382{
1383 uint16_t ip_hdrlen;
1384 struct ip_header *ip;
1385
1386 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1387 + sizeof(struct eth_header));
1388 unit->ip = (void *)ip;
1389 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1390 unit->ip_plen = &ip->ip_len;
1391 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1392 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1393 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1394}
1395
1396static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1397 const uint8_t *buf,
1398 VirtioNetRscUnit *unit)
1399{
1400 struct ip6_header *ip6;
1401
1402 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1403 + sizeof(struct eth_header));
1404 unit->ip = ip6;
1405 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1406 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
1407 + sizeof(struct ip6_header));
1408 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1409
1410 /* There is a difference between payload lenght in ipv4 and v6,
1411 ip header is excluded in ipv6 */
1412 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1413}
1414
1415static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1416 VirtioNetRscSeg *seg)
1417{
1418 int ret;
1419 struct virtio_net_hdr *h;
1420
1421 h = (struct virtio_net_hdr *)seg->buf;
1422 h->flags = 0;
1423 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1424
1425 if (seg->is_coalesced) {
1426 *virtio_net_rsc_ext_num_packets(h) = seg->packets;
1427 *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack;
1428 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1429 if (chain->proto == ETH_P_IP) {
1430 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1431 } else {
1432 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1433 }
1434 }
1435
1436 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1437 QTAILQ_REMOVE(&chain->buffers, seg, next);
1438 g_free(seg->buf);
1439 g_free(seg);
1440
1441 return ret;
1442}
1443
1444static void virtio_net_rsc_purge(void *opq)
1445{
1446 VirtioNetRscSeg *seg, *rn;
1447 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1448
1449 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1450 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1451 chain->stat.purge_failed++;
1452 continue;
1453 }
1454 }
1455
1456 chain->stat.timer++;
1457 if (!QTAILQ_EMPTY(&chain->buffers)) {
1458 timer_mod(chain->drain_timer,
1459 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1460 }
1461}
1462
1463static void virtio_net_rsc_cleanup(VirtIONet *n)
1464{
1465 VirtioNetRscChain *chain, *rn_chain;
1466 VirtioNetRscSeg *seg, *rn_seg;
1467
1468 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1469 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1470 QTAILQ_REMOVE(&chain->buffers, seg, next);
1471 g_free(seg->buf);
1472 g_free(seg);
1473 }
1474
1475 timer_del(chain->drain_timer);
1476 timer_free(chain->drain_timer);
1477 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1478 g_free(chain);
1479 }
1480}
1481
1482static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1483 NetClientState *nc,
1484 const uint8_t *buf, size_t size)
1485{
1486 uint16_t hdr_len;
1487 VirtioNetRscSeg *seg;
1488
1489 hdr_len = chain->n->guest_hdr_len;
1490 seg = g_malloc(sizeof(VirtioNetRscSeg));
1491 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1492 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1493 memcpy(seg->buf, buf, size);
1494 seg->size = size;
1495 seg->packets = 1;
1496 seg->dup_ack = 0;
1497 seg->is_coalesced = 0;
1498 seg->nc = nc;
1499
1500 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1501 chain->stat.cache++;
1502
1503 switch (chain->proto) {
1504 case ETH_P_IP:
1505 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1506 break;
1507 case ETH_P_IPV6:
1508 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1509 break;
1510 default:
1511 g_assert_not_reached();
1512 }
1513}
1514
1515static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1516 VirtioNetRscSeg *seg,
1517 const uint8_t *buf,
1518 struct tcp_header *n_tcp,
1519 struct tcp_header *o_tcp)
1520{
1521 uint32_t nack, oack;
1522 uint16_t nwin, owin;
1523
1524 nack = htonl(n_tcp->th_ack);
1525 nwin = htons(n_tcp->th_win);
1526 oack = htonl(o_tcp->th_ack);
1527 owin = htons(o_tcp->th_win);
1528
1529 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1530 chain->stat.ack_out_of_win++;
1531 return RSC_FINAL;
1532 } else if (nack == oack) {
1533 /* duplicated ack or window probe */
1534 if (nwin == owin) {
1535 /* duplicated ack, add dup ack count due to whql test up to 1 */
1536 chain->stat.dup_ack++;
1537 return RSC_FINAL;
1538 } else {
1539 /* Coalesce window update */
1540 o_tcp->th_win = n_tcp->th_win;
1541 chain->stat.win_update++;
1542 return RSC_COALESCE;
1543 }
1544 } else {
1545 /* pure ack, go to 'C', finalize*/
1546 chain->stat.pure_ack++;
1547 return RSC_FINAL;
1548 }
1549}
1550
1551static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1552 VirtioNetRscSeg *seg,
1553 const uint8_t *buf,
1554 VirtioNetRscUnit *n_unit)
1555{
1556 void *data;
1557 uint16_t o_ip_len;
1558 uint32_t nseq, oseq;
1559 VirtioNetRscUnit *o_unit;
1560
1561 o_unit = &seg->unit;
1562 o_ip_len = htons(*o_unit->ip_plen);
1563 nseq = htonl(n_unit->tcp->th_seq);
1564 oseq = htonl(o_unit->tcp->th_seq);
1565
1566 /* out of order or retransmitted. */
1567 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1568 chain->stat.data_out_of_win++;
1569 return RSC_FINAL;
1570 }
1571
1572 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1573 if (nseq == oseq) {
1574 if ((o_unit->payload == 0) && n_unit->payload) {
1575 /* From no payload to payload, normal case, not a dup ack or etc */
1576 chain->stat.data_after_pure_ack++;
1577 goto coalesce;
1578 } else {
1579 return virtio_net_rsc_handle_ack(chain, seg, buf,
1580 n_unit->tcp, o_unit->tcp);
1581 }
1582 } else if ((nseq - oseq) != o_unit->payload) {
1583 /* Not a consistent packet, out of order */
1584 chain->stat.data_out_of_order++;
1585 return RSC_FINAL;
1586 } else {
1587coalesce:
1588 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1589 chain->stat.over_size++;
1590 return RSC_FINAL;
1591 }
1592
1593 /* Here comes the right data, the payload length in v4/v6 is different,
1594 so use the field value to update and record the new data len */
1595 o_unit->payload += n_unit->payload; /* update new data len */
1596
1597 /* update field in ip header */
1598 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1599
1600 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1601 for windows guest, while this may change the behavior for linux
1602 guest (only if it uses RSC feature). */
1603 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
1604
1605 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
1606 o_unit->tcp->th_win = n_unit->tcp->th_win;
1607
1608 memmove(seg->buf + seg->size, data, n_unit->payload);
1609 seg->size += n_unit->payload;
1610 seg->packets++;
1611 chain->stat.coalesced++;
1612 return RSC_COALESCE;
1613 }
1614}
1615
1616static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
1617 VirtioNetRscSeg *seg,
1618 const uint8_t *buf, size_t size,
1619 VirtioNetRscUnit *unit)
1620{
1621 struct ip_header *ip1, *ip2;
1622
1623 ip1 = (struct ip_header *)(unit->ip);
1624 ip2 = (struct ip_header *)(seg->unit.ip);
1625 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
1626 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1627 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1628 chain->stat.no_match++;
1629 return RSC_NO_MATCH;
1630 }
1631
1632 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1633}
1634
1635static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
1636 VirtioNetRscSeg *seg,
1637 const uint8_t *buf, size_t size,
1638 VirtioNetRscUnit *unit)
1639{
1640 struct ip6_header *ip1, *ip2;
1641
1642 ip1 = (struct ip6_header *)(unit->ip);
1643 ip2 = (struct ip6_header *)(seg->unit.ip);
1644 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
1645 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
1646 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1647 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1648 chain->stat.no_match++;
1649 return RSC_NO_MATCH;
1650 }
1651
1652 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1653}
1654
1655/* Packets with 'SYN' should bypass, other flag should be sent after drain
1656 * to prevent out of order */
1657static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
1658 struct tcp_header *tcp)
1659{
1660 uint16_t tcp_hdr;
1661 uint16_t tcp_flag;
1662
1663 tcp_flag = htons(tcp->th_offset_flags);
1664 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
1665 tcp_flag &= VIRTIO_NET_TCP_FLAG;
1666 tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
1667 if (tcp_flag & TH_SYN) {
1668 chain->stat.tcp_syn++;
1669 return RSC_BYPASS;
1670 }
1671
1672 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
1673 chain->stat.tcp_ctrl_drain++;
1674 return RSC_FINAL;
1675 }
1676
1677 if (tcp_hdr > sizeof(struct tcp_header)) {
1678 chain->stat.tcp_all_opt++;
1679 return RSC_FINAL;
1680 }
1681
1682 return RSC_CANDIDATE;
1683}
1684
1685static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
1686 NetClientState *nc,
1687 const uint8_t *buf, size_t size,
1688 VirtioNetRscUnit *unit)
1689{
1690 int ret;
1691 VirtioNetRscSeg *seg, *nseg;
1692
1693 if (QTAILQ_EMPTY(&chain->buffers)) {
1694 chain->stat.empty_cache++;
1695 virtio_net_rsc_cache_buf(chain, nc, buf, size);
1696 timer_mod(chain->drain_timer,
1697 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1698 return size;
1699 }
1700
1701 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1702 if (chain->proto == ETH_P_IP) {
1703 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
1704 } else {
1705 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
1706 }
1707
1708 if (ret == RSC_FINAL) {
1709 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1710 /* Send failed */
1711 chain->stat.final_failed++;
1712 return 0;
1713 }
1714
1715 /* Send current packet */
1716 return virtio_net_do_receive(nc, buf, size);
1717 } else if (ret == RSC_NO_MATCH) {
1718 continue;
1719 } else {
1720 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
1721 seg->is_coalesced = 1;
1722 return size;
1723 }
1724 }
1725
1726 chain->stat.no_match_cache++;
1727 virtio_net_rsc_cache_buf(chain, nc, buf, size);
1728 return size;
1729}
1730
1731/* Drain a connection data, this is to avoid out of order segments */
1732static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
1733 NetClientState *nc,
1734 const uint8_t *buf, size_t size,
1735 uint16_t ip_start, uint16_t ip_size,
1736 uint16_t tcp_port)
1737{
1738 VirtioNetRscSeg *seg, *nseg;
1739 uint32_t ppair1, ppair2;
1740
1741 ppair1 = *(uint32_t *)(buf + tcp_port);
1742 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1743 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
1744 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
1745 || (ppair1 != ppair2)) {
1746 continue;
1747 }
1748 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1749 chain->stat.drain_failed++;
1750 }
1751
1752 break;
1753 }
1754
1755 return virtio_net_do_receive(nc, buf, size);
1756}
1757
1758static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
1759 struct ip_header *ip,
1760 const uint8_t *buf, size_t size)
1761{
1762 uint16_t ip_len;
1763
1764 /* Not an ipv4 packet */
1765 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
1766 chain->stat.ip_option++;
1767 return RSC_BYPASS;
1768 }
1769
1770 /* Don't handle packets with ip option */
1771 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
1772 chain->stat.ip_option++;
1773 return RSC_BYPASS;
1774 }
1775
1776 if (ip->ip_p != IPPROTO_TCP) {
1777 chain->stat.bypass_not_tcp++;
1778 return RSC_BYPASS;
1779 }
1780
1781 /* Don't handle packets with ip fragment */
1782 if (!(htons(ip->ip_off) & IP_DF)) {
1783 chain->stat.ip_frag++;
1784 return RSC_BYPASS;
1785 }
1786
1787 /* Don't handle packets with ecn flag */
1788 if (IPTOS_ECN(ip->ip_tos)) {
1789 chain->stat.ip_ecn++;
1790 return RSC_BYPASS;
1791 }
1792
1793 ip_len = htons(ip->ip_len);
1794 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
1795 || ip_len > (size - chain->n->guest_hdr_len -
1796 sizeof(struct eth_header))) {
1797 chain->stat.ip_hacked++;
1798 return RSC_BYPASS;
1799 }
1800
1801 return RSC_CANDIDATE;
1802}
1803
1804static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
1805 NetClientState *nc,
1806 const uint8_t *buf, size_t size)
1807{
1808 int32_t ret;
1809 uint16_t hdr_len;
1810 VirtioNetRscUnit unit;
1811
1812 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1813
1814 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
1815 + sizeof(struct tcp_header))) {
1816 chain->stat.bypass_not_tcp++;
1817 return virtio_net_do_receive(nc, buf, size);
1818 }
1819
1820 virtio_net_rsc_extract_unit4(chain, buf, &unit);
1821 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
1822 != RSC_CANDIDATE) {
1823 return virtio_net_do_receive(nc, buf, size);
1824 }
1825
1826 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1827 if (ret == RSC_BYPASS) {
1828 return virtio_net_do_receive(nc, buf, size);
1829 } else if (ret == RSC_FINAL) {
1830 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1831 ((hdr_len + sizeof(struct eth_header)) + 12),
1832 VIRTIO_NET_IP4_ADDR_SIZE,
1833 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
1834 }
1835
1836 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1837}
1838
1839static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
1840 struct ip6_header *ip6,
1841 const uint8_t *buf, size_t size)
1842{
1843 uint16_t ip_len;
1844
1845 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
1846 != IP_HEADER_VERSION_6) {
1847 return RSC_BYPASS;
1848 }
1849
1850 /* Both option and protocol is checked in this */
1851 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
1852 chain->stat.bypass_not_tcp++;
1853 return RSC_BYPASS;
1854 }
1855
1856 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1857 if (ip_len < sizeof(struct tcp_header) ||
1858 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
1859 - sizeof(struct ip6_header))) {
1860 chain->stat.ip_hacked++;
1861 return RSC_BYPASS;
1862 }
1863
1864 /* Don't handle packets with ecn flag */
1865 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
1866 chain->stat.ip_ecn++;
1867 return RSC_BYPASS;
1868 }
1869
1870 return RSC_CANDIDATE;
1871}
1872
1873static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
1874 const uint8_t *buf, size_t size)
1875{
1876 int32_t ret;
1877 uint16_t hdr_len;
1878 VirtioNetRscChain *chain;
1879 VirtioNetRscUnit unit;
1880
1881 chain = (VirtioNetRscChain *)opq;
1882 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1883
1884 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
1885 + sizeof(tcp_header))) {
1886 return virtio_net_do_receive(nc, buf, size);
1887 }
1888
1889 virtio_net_rsc_extract_unit6(chain, buf, &unit);
1890 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
1891 unit.ip, buf, size)) {
1892 return virtio_net_do_receive(nc, buf, size);
1893 }
1894
1895 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1896 if (ret == RSC_BYPASS) {
1897 return virtio_net_do_receive(nc, buf, size);
1898 } else if (ret == RSC_FINAL) {
1899 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1900 ((hdr_len + sizeof(struct eth_header)) + 8),
1901 VIRTIO_NET_IP6_ADDR_SIZE,
1902 hdr_len + sizeof(struct eth_header)
1903 + sizeof(struct ip6_header));
1904 }
1905
1906 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1907}
1908
1909static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
1910 NetClientState *nc,
1911 uint16_t proto)
1912{
1913 VirtioNetRscChain *chain;
1914
1915 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
1916 return NULL;
1917 }
1918
1919 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
1920 if (chain->proto == proto) {
1921 return chain;
1922 }
1923 }
1924
1925 chain = g_malloc(sizeof(*chain));
1926 chain->n = n;
1927 chain->proto = proto;
1928 if (proto == (uint16_t)ETH_P_IP) {
1929 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
1930 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1931 } else {
1932 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
1933 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1934 }
1935 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
1936 virtio_net_rsc_purge, chain);
1937 memset(&chain->stat, 0, sizeof(chain->stat));
1938
1939 QTAILQ_INIT(&chain->buffers);
1940 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
1941
1942 return chain;
1943}
1944
1945static ssize_t virtio_net_rsc_receive(NetClientState *nc,
1946 const uint8_t *buf,
1947 size_t size)
1948{
1949 uint16_t proto;
1950 VirtioNetRscChain *chain;
1951 struct eth_header *eth;
1952 VirtIONet *n;
1953
1954 n = qemu_get_nic_opaque(nc);
1955 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
1956 return virtio_net_do_receive(nc, buf, size);
1957 }
1958
1959 eth = (struct eth_header *)(buf + n->guest_hdr_len);
1960 proto = htons(eth->h_proto);
1961
1962 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
1963 if (chain) {
1964 chain->stat.received++;
1965 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
1966 return virtio_net_rsc_receive4(chain, nc, buf, size);
1967 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
1968 return virtio_net_rsc_receive6(chain, nc, buf, size);
1969 }
1970 }
1971 return virtio_net_do_receive(nc, buf, size);
1972}
1973
1974static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
1975 size_t size)
1976{
1977 VirtIONet *n = qemu_get_nic_opaque(nc);
1978 if ((n->rsc4_enabled || n->rsc6_enabled)) {
1979 return virtio_net_rsc_receive(nc, buf, size);
1980 } else {
1981 return virtio_net_do_receive(nc, buf, size);
1982 }
1983}
1984
0c87e93e 1985static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
6243375f 1986
4e68f7a0 1987static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
6243375f 1988{
cc1f0f45 1989 VirtIONet *n = qemu_get_nic_opaque(nc);
fed699f9 1990 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
17a0ca55 1991 VirtIODevice *vdev = VIRTIO_DEVICE(n);
6243375f 1992
51b19ebe 1993 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
17a0ca55 1994 virtio_notify(vdev, q->tx_vq);
6243375f 1995
51b19ebe
PB
1996 g_free(q->async_tx.elem);
1997 q->async_tx.elem = NULL;
6243375f 1998
0c87e93e
JW
1999 virtio_queue_set_notification(q->tx_vq, 1);
2000 virtio_net_flush_tx(q);
6243375f
MM
2001}
2002
fbe78f4f 2003/* TX */
0c87e93e 2004static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
fbe78f4f 2005{
0c87e93e 2006 VirtIONet *n = q->n;
17a0ca55 2007 VirtIODevice *vdev = VIRTIO_DEVICE(n);
51b19ebe 2008 VirtQueueElement *elem;
e3f30488 2009 int32_t num_packets = 0;
fed699f9 2010 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
17a0ca55 2011 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
e3f30488
AW
2012 return num_packets;
2013 }
fbe78f4f 2014
51b19ebe 2015 if (q->async_tx.elem) {
0c87e93e 2016 virtio_queue_set_notification(q->tx_vq, 0);
e3f30488 2017 return num_packets;
6243375f
MM
2018 }
2019
51b19ebe 2020 for (;;) {
bd89dd98 2021 ssize_t ret;
51b19ebe
PB
2022 unsigned int out_num;
2023 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
feb93f36 2024 struct virtio_net_hdr_mrg_rxbuf mhdr;
fbe78f4f 2025
51b19ebe
PB
2026 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2027 if (!elem) {
2028 break;
2029 }
2030
2031 out_num = elem->out_num;
2032 out_sg = elem->out_sg;
7b80d08e 2033 if (out_num < 1) {
fa5e56c2
GK
2034 virtio_error(vdev, "virtio-net header not in first element");
2035 virtqueue_detach_element(q->tx_vq, elem, 0);
2036 g_free(elem);
2037 return -EINVAL;
fbe78f4f
AL
2038 }
2039
032a74a1 2040 if (n->has_vnet_hdr) {
feb93f36
JW
2041 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2042 n->guest_hdr_len) {
fa5e56c2
GK
2043 virtio_error(vdev, "virtio-net header incorrect");
2044 virtqueue_detach_element(q->tx_vq, elem, 0);
2045 g_free(elem);
2046 return -EINVAL;
032a74a1 2047 }
1bfa316c 2048 if (n->needs_vnet_hdr_swap) {
feb93f36
JW
2049 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2050 sg2[0].iov_base = &mhdr;
2051 sg2[0].iov_len = n->guest_hdr_len;
2052 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2053 out_sg, out_num,
2054 n->guest_hdr_len, -1);
2055 if (out_num == VIRTQUEUE_MAX_SIZE) {
2056 goto drop;
7d37435b 2057 }
feb93f36
JW
2058 out_num += 1;
2059 out_sg = sg2;
7d37435b 2060 }
032a74a1 2061 }
14761f9c
MT
2062 /*
2063 * If host wants to see the guest header as is, we can
2064 * pass it on unchanged. Otherwise, copy just the parts
2065 * that host is interested in.
2066 */
2067 assert(n->host_hdr_len <= n->guest_hdr_len);
2068 if (n->host_hdr_len != n->guest_hdr_len) {
2069 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2070 out_sg, out_num,
2071 0, n->host_hdr_len);
2072 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2073 out_sg, out_num,
2074 n->guest_hdr_len, -1);
2075 out_num = sg_num;
2076 out_sg = sg;
fbe78f4f
AL
2077 }
2078
fed699f9
JW
2079 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2080 out_sg, out_num, virtio_net_tx_complete);
6243375f 2081 if (ret == 0) {
0c87e93e
JW
2082 virtio_queue_set_notification(q->tx_vq, 0);
2083 q->async_tx.elem = elem;
e3f30488 2084 return -EBUSY;
6243375f
MM
2085 }
2086
feb93f36 2087drop:
51b19ebe 2088 virtqueue_push(q->tx_vq, elem, 0);
17a0ca55 2089 virtio_notify(vdev, q->tx_vq);
51b19ebe 2090 g_free(elem);
e3f30488
AW
2091
2092 if (++num_packets >= n->tx_burst) {
2093 break;
2094 }
fbe78f4f 2095 }
e3f30488 2096 return num_packets;
fbe78f4f
AL
2097}
2098
a697a334 2099static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
fbe78f4f 2100{
17a0ca55 2101 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 2102 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
fbe78f4f 2103
283e2c2a
YB
2104 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2105 virtio_net_drop_tx_queue_data(vdev, vq);
2106 return;
2107 }
2108
783e7706 2109 /* This happens when device was stopped but VCPU wasn't. */
17a0ca55 2110 if (!vdev->vm_running) {
0c87e93e 2111 q->tx_waiting = 1;
783e7706
MT
2112 return;
2113 }
2114
0c87e93e 2115 if (q->tx_waiting) {
fbe78f4f 2116 virtio_queue_set_notification(vq, 1);
bc72ad67 2117 timer_del(q->tx_timer);
0c87e93e 2118 q->tx_waiting = 0;
fa5e56c2
GK
2119 if (virtio_net_flush_tx(q) == -EINVAL) {
2120 return;
2121 }
fbe78f4f 2122 } else {
bc72ad67
AB
2123 timer_mod(q->tx_timer,
2124 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
0c87e93e 2125 q->tx_waiting = 1;
fbe78f4f
AL
2126 virtio_queue_set_notification(vq, 0);
2127 }
2128}
2129
a697a334
AW
2130static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2131{
17a0ca55 2132 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 2133 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
a697a334 2134
283e2c2a
YB
2135 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2136 virtio_net_drop_tx_queue_data(vdev, vq);
2137 return;
2138 }
2139
0c87e93e 2140 if (unlikely(q->tx_waiting)) {
a697a334
AW
2141 return;
2142 }
0c87e93e 2143 q->tx_waiting = 1;
783e7706 2144 /* This happens when device was stopped but VCPU wasn't. */
17a0ca55 2145 if (!vdev->vm_running) {
783e7706
MT
2146 return;
2147 }
a697a334 2148 virtio_queue_set_notification(vq, 0);
0c87e93e 2149 qemu_bh_schedule(q->tx_bh);
a697a334
AW
2150}
2151
fbe78f4f
AL
2152static void virtio_net_tx_timer(void *opaque)
2153{
0c87e93e
JW
2154 VirtIONetQueue *q = opaque;
2155 VirtIONet *n = q->n;
17a0ca55 2156 VirtIODevice *vdev = VIRTIO_DEVICE(n);
e8bcf842
MT
2157 /* This happens when device was stopped but BH wasn't. */
2158 if (!vdev->vm_running) {
2159 /* Make sure tx waiting is set, so we'll run when restarted. */
2160 assert(q->tx_waiting);
2161 return;
2162 }
fbe78f4f 2163
0c87e93e 2164 q->tx_waiting = 0;
fbe78f4f
AL
2165
2166 /* Just in case the driver is not ready on more */
17a0ca55 2167 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
fbe78f4f 2168 return;
17a0ca55 2169 }
fbe78f4f 2170
0c87e93e
JW
2171 virtio_queue_set_notification(q->tx_vq, 1);
2172 virtio_net_flush_tx(q);
fbe78f4f
AL
2173}
2174
a697a334
AW
2175static void virtio_net_tx_bh(void *opaque)
2176{
0c87e93e
JW
2177 VirtIONetQueue *q = opaque;
2178 VirtIONet *n = q->n;
17a0ca55 2179 VirtIODevice *vdev = VIRTIO_DEVICE(n);
a697a334
AW
2180 int32_t ret;
2181
e8bcf842
MT
2182 /* This happens when device was stopped but BH wasn't. */
2183 if (!vdev->vm_running) {
2184 /* Make sure tx waiting is set, so we'll run when restarted. */
2185 assert(q->tx_waiting);
2186 return;
2187 }
783e7706 2188
0c87e93e 2189 q->tx_waiting = 0;
a697a334
AW
2190
2191 /* Just in case the driver is not ready on more */
17a0ca55 2192 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
a697a334 2193 return;
17a0ca55 2194 }
a697a334 2195
0c87e93e 2196 ret = virtio_net_flush_tx(q);
fa5e56c2
GK
2197 if (ret == -EBUSY || ret == -EINVAL) {
2198 return; /* Notification re-enable handled by tx_complete or device
2199 * broken */
a697a334
AW
2200 }
2201
2202 /* If we flush a full burst of packets, assume there are
2203 * more coming and immediately reschedule */
2204 if (ret >= n->tx_burst) {
0c87e93e
JW
2205 qemu_bh_schedule(q->tx_bh);
2206 q->tx_waiting = 1;
a697a334
AW
2207 return;
2208 }
2209
2210 /* If less than a full burst, re-enable notification and flush
2211 * anything that may have come in while we weren't looking. If
2212 * we find something, assume the guest is still active and reschedule */
0c87e93e 2213 virtio_queue_set_notification(q->tx_vq, 1);
fa5e56c2
GK
2214 ret = virtio_net_flush_tx(q);
2215 if (ret == -EINVAL) {
2216 return;
2217 } else if (ret > 0) {
0c87e93e
JW
2218 virtio_queue_set_notification(q->tx_vq, 0);
2219 qemu_bh_schedule(q->tx_bh);
2220 q->tx_waiting = 1;
a697a334
AW
2221 }
2222}
2223
f9d6dbf0
WC
2224static void virtio_net_add_queue(VirtIONet *n, int index)
2225{
2226 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2227
1c0fbfa3
MT
2228 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2229 virtio_net_handle_rx);
9b02e161 2230
f9d6dbf0
WC
2231 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2232 n->vqs[index].tx_vq =
9b02e161
WW
2233 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2234 virtio_net_handle_tx_timer);
f9d6dbf0
WC
2235 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2236 virtio_net_tx_timer,
2237 &n->vqs[index]);
2238 } else {
2239 n->vqs[index].tx_vq =
9b02e161
WW
2240 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2241 virtio_net_handle_tx_bh);
f9d6dbf0
WC
2242 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2243 }
2244
2245 n->vqs[index].tx_waiting = 0;
2246 n->vqs[index].n = n;
2247}
2248
2249static void virtio_net_del_queue(VirtIONet *n, int index)
2250{
2251 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2252 VirtIONetQueue *q = &n->vqs[index];
2253 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2254
2255 qemu_purge_queued_packets(nc);
2256
2257 virtio_del_queue(vdev, index * 2);
2258 if (q->tx_timer) {
2259 timer_del(q->tx_timer);
2260 timer_free(q->tx_timer);
f989c30c 2261 q->tx_timer = NULL;
f9d6dbf0
WC
2262 } else {
2263 qemu_bh_delete(q->tx_bh);
f989c30c 2264 q->tx_bh = NULL;
f9d6dbf0 2265 }
f989c30c 2266 q->tx_waiting = 0;
f9d6dbf0
WC
2267 virtio_del_queue(vdev, index * 2 + 1);
2268}
2269
2270static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2271{
2272 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2273 int old_num_queues = virtio_get_num_queues(vdev);
2274 int new_num_queues = new_max_queues * 2 + 1;
2275 int i;
2276
2277 assert(old_num_queues >= 3);
2278 assert(old_num_queues % 2 == 1);
2279
2280 if (old_num_queues == new_num_queues) {
2281 return;
2282 }
2283
2284 /*
2285 * We always need to remove and add ctrl vq if
2286 * old_num_queues != new_num_queues. Remove ctrl_vq first,
20f86a75 2287 * and then we only enter one of the following two loops.
f9d6dbf0
WC
2288 */
2289 virtio_del_queue(vdev, old_num_queues - 1);
2290
2291 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2292 /* new_num_queues < old_num_queues */
2293 virtio_net_del_queue(n, i / 2);
2294 }
2295
2296 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2297 /* new_num_queues > old_num_queues */
2298 virtio_net_add_queue(n, i / 2);
2299 }
2300
2301 /* add ctrl_vq last */
2302 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2303}
2304
ec57db16 2305static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
fed699f9 2306{
f9d6dbf0
WC
2307 int max = multiqueue ? n->max_queues : 1;
2308
fed699f9 2309 n->multiqueue = multiqueue;
f9d6dbf0 2310 virtio_net_change_num_queues(n, max);
fed699f9 2311
fed699f9
JW
2312 virtio_net_set_queues(n);
2313}
2314
982b78c5 2315static int virtio_net_post_load_device(void *opaque, int version_id)
037dab2f 2316{
982b78c5
DDAG
2317 VirtIONet *n = opaque;
2318 VirtIODevice *vdev = VIRTIO_DEVICE(n);
037dab2f 2319 int i, link_down;
fbe78f4f 2320
9d8c6a25 2321 trace_virtio_net_post_load_device();
982b78c5 2322 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
95129d6f
CH
2323 virtio_vdev_has_feature(vdev,
2324 VIRTIO_F_VERSION_1));
fbe78f4f 2325
76010cb3 2326 /* MAC_TABLE_ENTRIES may be different from the saved image */
982b78c5 2327 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
76010cb3 2328 n->mac_table.in_use = 0;
b6503ed9 2329 }
0ce0e8f4 2330
982b78c5 2331 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
6c666823
MT
2332 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2333 }
2334
2335 if (peer_has_vnet_hdr(n)) {
2336 virtio_net_apply_guest_offloads(n);
2337 }
2338
5f800801
JW
2339 virtio_net_set_queues(n);
2340
2d9aba39
AW
2341 /* Find the first multicast entry in the saved MAC filter */
2342 for (i = 0; i < n->mac_table.in_use; i++) {
2343 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2344 break;
2345 }
2346 }
2347 n->mac_table.first_multi = i;
98991481
AK
2348
2349 /* nc.link_down can't be migrated, so infer link_down according
2350 * to link status bit in n->status */
5f800801
JW
2351 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2352 for (i = 0; i < n->max_queues; i++) {
2353 qemu_get_subqueue(n->nic, i)->link_down = link_down;
2354 }
98991481 2355
6c666823
MT
2356 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2357 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
9d8c6a25
DDAG
2358 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2359 QEMU_CLOCK_VIRTUAL,
2360 virtio_net_announce_timer, n);
2361 if (n->announce_timer.round) {
2362 timer_mod(n->announce_timer.tm,
2363 qemu_clock_get_ms(n->announce_timer.type));
2364 } else {
944458b6 2365 qemu_announce_timer_del(&n->announce_timer, false);
9d8c6a25 2366 }
6c666823
MT
2367 }
2368
fbe78f4f
AL
2369 return 0;
2370}
2371
982b78c5
DDAG
2372/* tx_waiting field of a VirtIONetQueue */
2373static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2374 .name = "virtio-net-queue-tx_waiting",
2375 .fields = (VMStateField[]) {
2376 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2377 VMSTATE_END_OF_LIST()
2378 },
2379};
2380
2381static bool max_queues_gt_1(void *opaque, int version_id)
2382{
2383 return VIRTIO_NET(opaque)->max_queues > 1;
2384}
2385
2386static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2387{
2388 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2389 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2390}
2391
2392static bool mac_table_fits(void *opaque, int version_id)
2393{
2394 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2395}
2396
2397static bool mac_table_doesnt_fit(void *opaque, int version_id)
2398{
2399 return !mac_table_fits(opaque, version_id);
2400}
2401
2402/* This temporary type is shared by all the WITH_TMP methods
2403 * although only some fields are used by each.
2404 */
2405struct VirtIONetMigTmp {
2406 VirtIONet *parent;
2407 VirtIONetQueue *vqs_1;
2408 uint16_t curr_queues_1;
2409 uint8_t has_ufo;
2410 uint32_t has_vnet_hdr;
2411};
2412
2413/* The 2nd and subsequent tx_waiting flags are loaded later than
2414 * the 1st entry in the queues and only if there's more than one
2415 * entry. We use the tmp mechanism to calculate a temporary
2416 * pointer and count and also validate the count.
2417 */
2418
44b1ff31 2419static int virtio_net_tx_waiting_pre_save(void *opaque)
982b78c5
DDAG
2420{
2421 struct VirtIONetMigTmp *tmp = opaque;
2422
2423 tmp->vqs_1 = tmp->parent->vqs + 1;
2424 tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2425 if (tmp->parent->curr_queues == 0) {
2426 tmp->curr_queues_1 = 0;
2427 }
44b1ff31
DDAG
2428
2429 return 0;
982b78c5
DDAG
2430}
2431
2432static int virtio_net_tx_waiting_pre_load(void *opaque)
2433{
2434 struct VirtIONetMigTmp *tmp = opaque;
2435
2436 /* Reuse the pointer setup from save */
2437 virtio_net_tx_waiting_pre_save(opaque);
2438
2439 if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2440 error_report("virtio-net: curr_queues %x > max_queues %x",
2441 tmp->parent->curr_queues, tmp->parent->max_queues);
2442
2443 return -EINVAL;
2444 }
2445
2446 return 0; /* all good */
2447}
2448
2449static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2450 .name = "virtio-net-tx_waiting",
2451 .pre_load = virtio_net_tx_waiting_pre_load,
2452 .pre_save = virtio_net_tx_waiting_pre_save,
2453 .fields = (VMStateField[]) {
2454 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2455 curr_queues_1,
2456 vmstate_virtio_net_queue_tx_waiting,
2457 struct VirtIONetQueue),
2458 VMSTATE_END_OF_LIST()
2459 },
2460};
2461
2462/* the 'has_ufo' flag is just tested; if the incoming stream has the
2463 * flag set we need to check that we have it
2464 */
2465static int virtio_net_ufo_post_load(void *opaque, int version_id)
2466{
2467 struct VirtIONetMigTmp *tmp = opaque;
2468
2469 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2470 error_report("virtio-net: saved image requires TUN_F_UFO support");
2471 return -EINVAL;
2472 }
2473
2474 return 0;
2475}
2476
44b1ff31 2477static int virtio_net_ufo_pre_save(void *opaque)
982b78c5
DDAG
2478{
2479 struct VirtIONetMigTmp *tmp = opaque;
2480
2481 tmp->has_ufo = tmp->parent->has_ufo;
44b1ff31
DDAG
2482
2483 return 0;
982b78c5
DDAG
2484}
2485
2486static const VMStateDescription vmstate_virtio_net_has_ufo = {
2487 .name = "virtio-net-ufo",
2488 .post_load = virtio_net_ufo_post_load,
2489 .pre_save = virtio_net_ufo_pre_save,
2490 .fields = (VMStateField[]) {
2491 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2492 VMSTATE_END_OF_LIST()
2493 },
2494};
2495
2496/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2497 * flag set we need to check that we have it
2498 */
2499static int virtio_net_vnet_post_load(void *opaque, int version_id)
2500{
2501 struct VirtIONetMigTmp *tmp = opaque;
2502
2503 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2504 error_report("virtio-net: saved image requires vnet_hdr=on");
2505 return -EINVAL;
2506 }
2507
2508 return 0;
2509}
2510
44b1ff31 2511static int virtio_net_vnet_pre_save(void *opaque)
982b78c5
DDAG
2512{
2513 struct VirtIONetMigTmp *tmp = opaque;
2514
2515 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
44b1ff31
DDAG
2516
2517 return 0;
982b78c5
DDAG
2518}
2519
2520static const VMStateDescription vmstate_virtio_net_has_vnet = {
2521 .name = "virtio-net-vnet",
2522 .post_load = virtio_net_vnet_post_load,
2523 .pre_save = virtio_net_vnet_pre_save,
2524 .fields = (VMStateField[]) {
2525 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2526 VMSTATE_END_OF_LIST()
2527 },
2528};
2529
2530static const VMStateDescription vmstate_virtio_net_device = {
2531 .name = "virtio-net-device",
2532 .version_id = VIRTIO_NET_VM_VERSION,
2533 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2534 .post_load = virtio_net_post_load_device,
2535 .fields = (VMStateField[]) {
2536 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2537 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2538 vmstate_virtio_net_queue_tx_waiting,
2539 VirtIONetQueue),
2540 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2541 VMSTATE_UINT16(status, VirtIONet),
2542 VMSTATE_UINT8(promisc, VirtIONet),
2543 VMSTATE_UINT8(allmulti, VirtIONet),
2544 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2545
2546 /* Guarded pair: If it fits we load it, else we throw it away
2547 * - can happen if source has a larger MAC table.; post-load
2548 * sets flags in this case.
2549 */
2550 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
2551 0, mac_table_fits, mac_table.in_use,
2552 ETH_ALEN),
2553 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
2554 mac_table.in_use, ETH_ALEN),
2555
2556 /* Note: This is an array of uint32's that's always been saved as a
2557 * buffer; hold onto your endiannesses; it's actually used as a bitmap
2558 * but based on the uint.
2559 */
2560 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
2561 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2562 vmstate_virtio_net_has_vnet),
2563 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
2564 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
2565 VMSTATE_UINT8(alluni, VirtIONet),
2566 VMSTATE_UINT8(nomulti, VirtIONet),
2567 VMSTATE_UINT8(nouni, VirtIONet),
2568 VMSTATE_UINT8(nobcast, VirtIONet),
2569 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2570 vmstate_virtio_net_has_ufo),
2571 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
2572 vmstate_info_uint16_equal, uint16_t),
2573 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
2574 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2575 vmstate_virtio_net_tx_waiting),
2576 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
2577 has_ctrl_guest_offloads),
2578 VMSTATE_END_OF_LIST()
2579 },
2580};
2581
eb6b6c12 2582static NetClientInfo net_virtio_info = {
f394b2e2 2583 .type = NET_CLIENT_DRIVER_NIC,
eb6b6c12
MM
2584 .size = sizeof(NICState),
2585 .can_receive = virtio_net_can_receive,
2586 .receive = virtio_net_receive,
eb6b6c12 2587 .link_status_changed = virtio_net_set_link_status,
b1be4280 2588 .query_rx_filter = virtio_net_query_rxfilter,
b2c929f0 2589 .announce = virtio_net_announce,
eb6b6c12
MM
2590};
2591
f56a1247
MT
2592static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
2593{
17a0ca55 2594 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 2595 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
f56a1247 2596 assert(n->vhost_started);
ed8b4afe 2597 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
f56a1247
MT
2598}
2599
2600static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
2601 bool mask)
2602{
17a0ca55 2603 VirtIONet *n = VIRTIO_NET(vdev);
fed699f9 2604 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
f56a1247 2605 assert(n->vhost_started);
ed8b4afe 2606 vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
f56a1247
MT
2607 vdev, idx, mask);
2608}
2609
019a3edb 2610static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
fbe78f4f 2611{
0cd09c3a 2612 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
a93e599d 2613
ba550851
SG
2614 n->config_size = virtio_feature_get_config_size(feature_sizes,
2615 host_features);
17ec5a86
FK
2616}
2617
8a253ec2
FK
2618void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
2619 const char *type)
2620{
2621 /*
2622 * The name can be NULL, the netclient name will be type.x.
2623 */
2624 assert(type != NULL);
2625
9e288406 2626 g_free(n->netclient_name);
9e288406 2627 g_free(n->netclient_type);
80e0090a 2628 n->netclient_name = g_strdup(name);
8a253ec2
FK
2629 n->netclient_type = g_strdup(type);
2630}
2631
e6f746b3 2632static void virtio_net_device_realize(DeviceState *dev, Error **errp)
17ec5a86 2633{
e6f746b3 2634 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
284a32f0 2635 VirtIONet *n = VIRTIO_NET(dev);
b1be4280 2636 NetClientState *nc;
284a32f0 2637 int i;
1773d9ee 2638
a93e599d 2639 if (n->net_conf.mtu) {
127833ee 2640 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
a93e599d
MC
2641 }
2642
9473939e
JB
2643 if (n->net_conf.duplex_str) {
2644 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
2645 n->net_conf.duplex = DUPLEX_HALF;
2646 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
2647 n->net_conf.duplex = DUPLEX_FULL;
2648 } else {
2649 error_setg(errp, "'duplex' must be 'half' or 'full'");
2650 }
2651 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2652 } else {
2653 n->net_conf.duplex = DUPLEX_UNKNOWN;
2654 }
2655
2656 if (n->net_conf.speed < SPEED_UNKNOWN) {
2657 error_setg(errp, "'speed' must be between 0 and INT_MAX");
2658 } else if (n->net_conf.speed >= 0) {
2659 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2660 }
2661
da3e8a23 2662 virtio_net_set_config_size(n, n->host_features);
284a32f0 2663 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
fbe78f4f 2664
1c0fbfa3
MT
2665 /*
2666 * We set a lower limit on RX queue size to what it always was.
2667 * Guests that want a smaller ring can always resize it without
2668 * help from us (using virtio 1 and up).
2669 */
2670 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
2671 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
5f997fd1 2672 !is_power_of_2(n->net_conf.rx_queue_size)) {
1c0fbfa3
MT
2673 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
2674 "must be a power of 2 between %d and %d.",
2675 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
2676 VIRTQUEUE_MAX_SIZE);
2677 virtio_cleanup(vdev);
2678 return;
2679 }
2680
9b02e161
WW
2681 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
2682 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
2683 !is_power_of_2(n->net_conf.tx_queue_size)) {
2684 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
2685 "must be a power of 2 between %d and %d",
2686 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
2687 VIRTQUEUE_MAX_SIZE);
2688 virtio_cleanup(vdev);
2689 return;
2690 }
2691
575a1c0e 2692 n->max_queues = MAX(n->nic_conf.peers.queues, 1);
87b3bd1c 2693 if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
7e0e736e 2694 error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
631b22ea 2695 "must be a positive integer less than %d.",
87b3bd1c 2696 n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
7e0e736e
JW
2697 virtio_cleanup(vdev);
2698 return;
2699 }
f6b26cf2 2700 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
fed699f9 2701 n->curr_queues = 1;
1773d9ee 2702 n->tx_timeout = n->net_conf.txtimer;
a697a334 2703
1773d9ee
FK
2704 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
2705 && strcmp(n->net_conf.tx, "bh")) {
0765691e
MA
2706 warn_report("virtio-net: "
2707 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
2708 n->net_conf.tx);
2709 error_printf("Defaulting to \"bh\"");
a697a334
AW
2710 }
2711
2eef278b
MT
2712 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
2713 n->net_conf.tx_queue_size);
9b02e161 2714
da51a335 2715 for (i = 0; i < n->max_queues; i++) {
f9d6dbf0 2716 virtio_net_add_queue(n, i);
a697a334 2717 }
da51a335 2718
17a0ca55 2719 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1773d9ee
FK
2720 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
2721 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
554c97dd 2722 n->status = VIRTIO_NET_S_LINK_UP;
9d8c6a25
DDAG
2723 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2724 QEMU_CLOCK_VIRTUAL,
2725 virtio_net_announce_timer, n);
b2c929f0 2726 n->announce_timer.round = 0;
fbe78f4f 2727
8a253ec2
FK
2728 if (n->netclient_type) {
2729 /*
2730 * Happen when virtio_net_set_netclient_name has been called.
2731 */
2732 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2733 n->netclient_type, n->netclient_name, n);
2734 } else {
2735 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
284a32f0 2736 object_get_typename(OBJECT(dev)), dev->id, n);
8a253ec2
FK
2737 }
2738
6e371ab8
MT
2739 peer_test_vnet_hdr(n);
2740 if (peer_has_vnet_hdr(n)) {
fed699f9 2741 for (i = 0; i < n->max_queues; i++) {
d6085e3a 2742 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
fed699f9 2743 }
6e371ab8
MT
2744 n->host_hdr_len = sizeof(struct virtio_net_hdr);
2745 } else {
2746 n->host_hdr_len = 0;
2747 }
eb6b6c12 2748
1773d9ee 2749 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
96d5e201 2750
fed699f9 2751 n->vqs[0].tx_waiting = 0;
1773d9ee 2752 n->tx_burst = n->net_conf.txburst;
bb9d17f8 2753 virtio_net_set_mrg_rx_bufs(n, 0, 0);
002437cd 2754 n->promisc = 1; /* for compatibility */
fbe78f4f 2755
7267c094 2756 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
b6503ed9 2757
7267c094 2758 n->vlans = g_malloc0(MAX_VLAN >> 3);
f21c0ed9 2759
b1be4280
AK
2760 nc = qemu_get_queue(n->nic);
2761 nc->rxfilter_notify_enabled = 1;
2762
2974e916 2763 QTAILQ_INIT(&n->rsc_chains);
284a32f0 2764 n->qdev = dev;
17ec5a86
FK
2765}
2766
306ec6c3 2767static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
17ec5a86 2768{
306ec6c3
AF
2769 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2770 VirtIONet *n = VIRTIO_NET(dev);
f9d6dbf0 2771 int i, max_queues;
17ec5a86
FK
2772
2773 /* This will stop vhost backend if appropriate. */
2774 virtio_net_set_status(vdev, 0);
2775
9e288406
MA
2776 g_free(n->netclient_name);
2777 n->netclient_name = NULL;
2778 g_free(n->netclient_type);
2779 n->netclient_type = NULL;
8a253ec2 2780
17ec5a86
FK
2781 g_free(n->mac_table.macs);
2782 g_free(n->vlans);
2783
f9d6dbf0
WC
2784 max_queues = n->multiqueue ? n->max_queues : 1;
2785 for (i = 0; i < max_queues; i++) {
2786 virtio_net_del_queue(n, i);
17ec5a86
FK
2787 }
2788
944458b6 2789 qemu_announce_timer_del(&n->announce_timer, false);
17ec5a86
FK
2790 g_free(n->vqs);
2791 qemu_del_nic(n->nic);
2974e916 2792 virtio_net_rsc_cleanup(n);
6a1a8cc7 2793 virtio_cleanup(vdev);
17ec5a86
FK
2794}
2795
2796static void virtio_net_instance_init(Object *obj)
2797{
2798 VirtIONet *n = VIRTIO_NET(obj);
2799
2800 /*
2801 * The default config_size is sizeof(struct virtio_net_config).
2802 * Can be overriden with virtio_net_set_config_size.
2803 */
2804 n->config_size = sizeof(struct virtio_net_config);
aa4197c3
GA
2805 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
2806 "bootindex", "/ethernet-phy@0",
2807 DEVICE(n), NULL);
17ec5a86
FK
2808}
2809
44b1ff31 2810static int virtio_net_pre_save(void *opaque)
4d45dcfb
HP
2811{
2812 VirtIONet *n = opaque;
2813
2814 /* At this point, backend must be stopped, otherwise
2815 * it might keep writing to memory. */
2816 assert(!n->vhost_started);
44b1ff31
DDAG
2817
2818 return 0;
4d45dcfb
HP
2819}
2820
2821static const VMStateDescription vmstate_virtio_net = {
2822 .name = "virtio-net",
2823 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2824 .version_id = VIRTIO_NET_VM_VERSION,
2825 .fields = (VMStateField[]) {
2826 VMSTATE_VIRTIO_DEVICE,
2827 VMSTATE_END_OF_LIST()
2828 },
2829 .pre_save = virtio_net_pre_save,
2830};
290c2428 2831
17ec5a86 2832static Property virtio_net_properties[] = {
127833ee
JB
2833 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
2834 VIRTIO_NET_F_CSUM, true),
2835 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
87108bb2 2836 VIRTIO_NET_F_GUEST_CSUM, true),
127833ee
JB
2837 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
2838 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
87108bb2 2839 VIRTIO_NET_F_GUEST_TSO4, true),
127833ee 2840 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
87108bb2 2841 VIRTIO_NET_F_GUEST_TSO6, true),
127833ee 2842 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
87108bb2 2843 VIRTIO_NET_F_GUEST_ECN, true),
127833ee 2844 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
87108bb2 2845 VIRTIO_NET_F_GUEST_UFO, true),
127833ee 2846 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
87108bb2 2847 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
127833ee 2848 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
87108bb2 2849 VIRTIO_NET_F_HOST_TSO4, true),
127833ee 2850 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
87108bb2 2851 VIRTIO_NET_F_HOST_TSO6, true),
127833ee 2852 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
87108bb2 2853 VIRTIO_NET_F_HOST_ECN, true),
127833ee 2854 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
87108bb2 2855 VIRTIO_NET_F_HOST_UFO, true),
127833ee 2856 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
87108bb2 2857 VIRTIO_NET_F_MRG_RXBUF, true),
127833ee 2858 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
87108bb2 2859 VIRTIO_NET_F_STATUS, true),
127833ee 2860 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
87108bb2 2861 VIRTIO_NET_F_CTRL_VQ, true),
127833ee 2862 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
87108bb2 2863 VIRTIO_NET_F_CTRL_RX, true),
127833ee 2864 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
87108bb2 2865 VIRTIO_NET_F_CTRL_VLAN, true),
127833ee 2866 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
87108bb2 2867 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
127833ee 2868 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
87108bb2 2869 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
127833ee 2870 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
87108bb2 2871 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
127833ee 2872 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
2974e916
YB
2873 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
2874 VIRTIO_NET_F_RSC_EXT, false),
2875 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
2876 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
17ec5a86
FK
2877 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
2878 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
87108bb2 2879 TX_TIMER_INTERVAL),
17ec5a86
FK
2880 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
2881 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
1c0fbfa3
MT
2882 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
2883 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
9b02e161
WW
2884 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
2885 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
a93e599d 2886 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
75ebec11
MC
2887 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
2888 true),
9473939e
JB
2889 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
2890 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
17ec5a86
FK
2891 DEFINE_PROP_END_OF_LIST(),
2892};
2893
2894static void virtio_net_class_init(ObjectClass *klass, void *data)
2895{
2896 DeviceClass *dc = DEVICE_CLASS(klass);
2897 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
e6f746b3 2898
17ec5a86 2899 dc->props = virtio_net_properties;
290c2428 2900 dc->vmsd = &vmstate_virtio_net;
125ee0ed 2901 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
e6f746b3 2902 vdc->realize = virtio_net_device_realize;
306ec6c3 2903 vdc->unrealize = virtio_net_device_unrealize;
17ec5a86
FK
2904 vdc->get_config = virtio_net_get_config;
2905 vdc->set_config = virtio_net_set_config;
2906 vdc->get_features = virtio_net_get_features;
2907 vdc->set_features = virtio_net_set_features;
2908 vdc->bad_features = virtio_net_bad_features;
2909 vdc->reset = virtio_net_reset;
2910 vdc->set_status = virtio_net_set_status;
2911 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
2912 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
2a083ffd 2913 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
982b78c5 2914 vdc->vmsd = &vmstate_virtio_net_device;
17ec5a86
FK
2915}
2916
2917static const TypeInfo virtio_net_info = {
2918 .name = TYPE_VIRTIO_NET,
2919 .parent = TYPE_VIRTIO_DEVICE,
2920 .instance_size = sizeof(VirtIONet),
2921 .instance_init = virtio_net_instance_init,
2922 .class_init = virtio_net_class_init,
2923};
2924
2925static void virtio_register_types(void)
2926{
2927 type_register_static(&virtio_net_info);
2928}
2929
2930type_init(virtio_register_types)