hw/net/virtio-net.c

   1 /*
   2  * Virtio Network Device
   3  *
   4  * Copyright IBM, Corp. 2007
   5  *
   6  * Authors:
   7  *  Anthony Liguori   <aliguori@us.ibm.com>
   8  *
   9  * This work is licensed under the terms of the GNU GPL, version 2.  See
  10  * the COPYING file in the top-level directory.
  11  *
  12  */
  13
  14 #include "qemu/osdep.h"
  15 #include "qemu/iov.h"
  16 #include "hw/virtio/virtio.h"
  17 #include "net/net.h"
  18 #include "net/checksum.h"
  19 #include "net/tap.h"
  20 #include "qemu/error-report.h"
  21 #include "qemu/timer.h"
  22 #include "hw/virtio/virtio-net.h"
  23 #include "net/vhost_net.h"
  24 #include "hw/virtio/virtio-bus.h"
  25 #include "qapi/qmp/qjson.h"
  26 #include "qapi-event.h"
  27 #include "hw/virtio/virtio-access.h"
  28
  29 #define VIRTIO_NET_VM_VERSION    11
  30
  31 #define MAC_TABLE_ENTRIES    64
  32 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  33
  34 /* previously fixed value */
  35 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  36 /* for now, only allow larger queues; with virtio-1, guest can downsize */
  37 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  38
  39 /*
  40  * Calculate the number of bytes up to and including the given 'field' of
  41  * 'container'.
  42  */
  43 #define endof(container, field) \
  44     (offsetof(container, field) + sizeof(((container *)0)->field))
  45
  46 typedef struct VirtIOFeature {
  47     uint32_t flags;
  48     size_t end;
  49 } VirtIOFeature;
  50
  51 static VirtIOFeature feature_sizes[] = {
  52     {.flags = 1 << VIRTIO_NET_F_MAC,
  53      .end = endof(struct virtio_net_config, mac)},
  54     {.flags = 1 << VIRTIO_NET_F_STATUS,
  55      .end = endof(struct virtio_net_config, status)},
  56     {.flags = 1 << VIRTIO_NET_F_MQ,
  57      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
  58     {}
  59 };
  60
  61 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
  62 {
  63     VirtIONet *n = qemu_get_nic_opaque(nc);
  64
  65     return &n->vqs[nc->queue_index];
  66 }
  67
  68 static int vq2q(int queue_index)
  69 {
  70     return queue_index / 2;
  71 }
  72
  73 /* TODO
  74  * - we could suppress RX interrupt if we were so inclined.
  75  */
  76
  77 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
  78 {
  79     VirtIONet *n = VIRTIO_NET(vdev);
  80     struct virtio_net_config netcfg;
  81
  82     virtio_stw_p(vdev, &netcfg.status, n->status);
  83     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
  84     memcpy(netcfg.mac, n->mac, ETH_ALEN);
  85     memcpy(config, &netcfg, n->config_size);
  86 }
  87
  88 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
  89 {
  90     VirtIONet *n = VIRTIO_NET(vdev);
  91     struct virtio_net_config netcfg = {};
  92
  93     memcpy(&netcfg, config, n->config_size);
  94
  95     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
  96         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
  97         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
  98         memcpy(n->mac, netcfg.mac, ETH_ALEN);
  99         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 100     }
 101 }
 102
 103 static bool virtio_net_started(VirtIONet *n, uint8_t status)
 104 {
 105     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 106     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 107         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 108 }
 109
 110 static void virtio_net_announce_timer(void *opaque)
 111 {
 112     VirtIONet *n = opaque;
 113     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 114
 115     n->announce_counter--;
 116     n->status |= VIRTIO_NET_S_ANNOUNCE;
 117     virtio_notify_config(vdev);
 118 }
 119
 120 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 121 {
 122     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 123     NetClientState *nc = qemu_get_queue(n->nic);
 124     int queues = n->multiqueue ? n->max_queues : 1;
 125
 126     if (!get_vhost_net(nc->peer)) {
 127         return;
 128     }
 129
 130     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 131         !!n->vhost_started) {
 132         return;
 133     }
 134     if (!n->vhost_started) {
 135         int r, i;
 136
 137         if (n->needs_vnet_hdr_swap) {
 138             error_report("backend does not support %s vnet headers; "
 139                          "falling back on userspace virtio",
 140                          virtio_is_big_endian(vdev) ? "BE" : "LE");
 141             return;
 142         }
 143
 144         /* Any packets outstanding? Purge them to avoid touching rings
 145          * when vhost is running.
 146          */
 147         for (i = 0;  i < queues; i++) {
 148             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 149
 150             /* Purge both directions: TX and RX. */
 151             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 152             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 153         }
 154
 155         n->vhost_started = 1;
 156         r = vhost_net_start(vdev, n->nic->ncs, queues);
 157         if (r < 0) {
 158             error_report("unable to start vhost net: %d: "
 159                          "falling back on userspace virtio", -r);
 160             n->vhost_started = 0;
 161         }
 162     } else {
 163         vhost_net_stop(vdev, n->nic->ncs, queues);
 164         n->vhost_started = 0;
 165     }
 166 }
 167
 168 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 169                                           NetClientState *peer,
 170                                           bool enable)
 171 {
 172     if (virtio_is_big_endian(vdev)) {
 173         return qemu_set_vnet_be(peer, enable);
 174     } else {
 175         return qemu_set_vnet_le(peer, enable);
 176     }
 177 }
 178
 179 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 180                                        int queues, bool enable)
 181 {
 182     int i;
 183
 184     for (i = 0; i < queues; i++) {
 185         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 186             enable) {
 187             while (--i >= 0) {
 188                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 189             }
 190
 191             return true;
 192         }
 193     }
 194
 195     return false;
 196 }
 197
 198 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 199 {
 200     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 201     int queues = n->multiqueue ? n->max_queues : 1;
 202
 203     if (virtio_net_started(n, status)) {
 204         /* Before using the device, we tell the network backend about the
 205          * endianness to use when parsing vnet headers. If the backend
 206          * can't do it, we fallback onto fixing the headers in the core
 207          * virtio-net code.
 208          */
 209         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 210                                                             queues, true);
 211     } else if (virtio_net_started(n, vdev->status)) {
 212         /* After using the device, we need to reset the network backend to
 213          * the default (guest native endianness), otherwise the guest may
 214          * lose network connectivity if it is rebooted into a different
 215          * endianness.
 216          */
 217         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
 218     }
 219 }
 220
 221 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 222 {
 223     unsigned int dropped = virtqueue_drop_all(vq);
 224     if (dropped) {
 225         virtio_notify(vdev, vq);
 226     }
 227 }
 228
 229 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 230 {
 231     VirtIONet *n = VIRTIO_NET(vdev);
 232     VirtIONetQueue *q;
 233     int i;
 234     uint8_t queue_status;
 235
 236     virtio_net_vnet_endian_status(n, status);
 237     virtio_net_vhost_status(n, status);
 238
 239     for (i = 0; i < n->max_queues; i++) {
 240         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 241         bool queue_started;
 242         q = &n->vqs[i];
 243
 244         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
 245             queue_status = 0;
 246         } else {
 247             queue_status = status;
 248         }
 249         queue_started =
 250             virtio_net_started(n, queue_status) && !n->vhost_started;
 251
 252         if (queue_started) {
 253             qemu_flush_queued_packets(ncs);
 254         }
 255
 256         if (!q->tx_waiting) {
 257             continue;
 258         }
 259
 260         if (queue_started) {
 261             if (q->tx_timer) {
 262                 timer_mod(q->tx_timer,
 263                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 264             } else {
 265                 qemu_bh_schedule(q->tx_bh);
 266             }
 267         } else {
 268             if (q->tx_timer) {
 269                 timer_del(q->tx_timer);
 270             } else {
 271                 qemu_bh_cancel(q->tx_bh);
 272             }
 273             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 274                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK)) {
 275                 /* if tx is waiting we are likely have some packets in tx queue
 276                  * and disabled notification */
 277                 q->tx_waiting = 0;
 278                 virtio_queue_set_notification(q->tx_vq, 1);
 279                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 280             }
 281         }
 282     }
 283 }
 284
 285 static void virtio_net_set_link_status(NetClientState *nc)
 286 {
 287     VirtIONet *n = qemu_get_nic_opaque(nc);
 288     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 289     uint16_t old_status = n->status;
 290
 291     if (nc->link_down)
 292         n->status &= ~VIRTIO_NET_S_LINK_UP;
 293     else
 294         n->status |= VIRTIO_NET_S_LINK_UP;
 295
 296     if (n->status != old_status)
 297         virtio_notify_config(vdev);
 298
 299     virtio_net_set_status(vdev, vdev->status);
 300 }
 301
 302 static void rxfilter_notify(NetClientState *nc)
 303 {
 304     VirtIONet *n = qemu_get_nic_opaque(nc);
 305
 306     if (nc->rxfilter_notify_enabled) {
 307         gchar *path = object_get_canonical_path(OBJECT(n->qdev));
 308         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
 309                                               n->netclient_name, path, &error_abort);
 310         g_free(path);
 311
 312         /* disable event notification to avoid events flooding */
 313         nc->rxfilter_notify_enabled = 0;
 314     }
 315 }
 316
 317 static intList *get_vlan_table(VirtIONet *n)
 318 {
 319     intList *list, *entry;
 320     int i, j;
 321
 322     list = NULL;
 323     for (i = 0; i < MAX_VLAN >> 5; i++) {
 324         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 325             if (n->vlans[i] & (1U << j)) {
 326                 entry = g_malloc0(sizeof(*entry));
 327                 entry->value = (i << 5) + j;
 328                 entry->next = list;
 329                 list = entry;
 330             }
 331         }
 332     }
 333
 334     return list;
 335 }
 336
 337 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 338 {
 339     VirtIONet *n = qemu_get_nic_opaque(nc);
 340     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 341     RxFilterInfo *info;
 342     strList *str_list, *entry;
 343     int i;
 344
 345     info = g_malloc0(sizeof(*info));
 346     info->name = g_strdup(nc->name);
 347     info->promiscuous = n->promisc;
 348
 349     if (n->nouni) {
 350         info->unicast = RX_STATE_NONE;
 351     } else if (n->alluni) {
 352         info->unicast = RX_STATE_ALL;
 353     } else {
 354         info->unicast = RX_STATE_NORMAL;
 355     }
 356
 357     if (n->nomulti) {
 358         info->multicast = RX_STATE_NONE;
 359     } else if (n->allmulti) {
 360         info->multicast = RX_STATE_ALL;
 361     } else {
 362         info->multicast = RX_STATE_NORMAL;
 363     }
 364
 365     info->broadcast_allowed = n->nobcast;
 366     info->multicast_overflow = n->mac_table.multi_overflow;
 367     info->unicast_overflow = n->mac_table.uni_overflow;
 368
 369     info->main_mac = qemu_mac_strdup_printf(n->mac);
 370
 371     str_list = NULL;
 372     for (i = 0; i < n->mac_table.first_multi; i++) {
 373         entry = g_malloc0(sizeof(*entry));
 374         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
 375         entry->next = str_list;
 376         str_list = entry;
 377     }
 378     info->unicast_table = str_list;
 379
 380     str_list = NULL;
 381     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 382         entry = g_malloc0(sizeof(*entry));
 383         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
 384         entry->next = str_list;
 385         str_list = entry;
 386     }
 387     info->multicast_table = str_list;
 388     info->vlan_table = get_vlan_table(n);
 389
 390     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 391         info->vlan = RX_STATE_ALL;
 392     } else if (!info->vlan_table) {
 393         info->vlan = RX_STATE_NONE;
 394     } else {
 395         info->vlan = RX_STATE_NORMAL;
 396     }
 397
 398     /* enable event notification after query */
 399     nc->rxfilter_notify_enabled = 1;
 400
 401     return info;
 402 }
 403
 404 static void virtio_net_reset(VirtIODevice *vdev)
 405 {
 406     VirtIONet *n = VIRTIO_NET(vdev);
 407
 408     /* Reset back to compatibility mode */
 409     n->promisc = 1;
 410     n->allmulti = 0;
 411     n->alluni = 0;
 412     n->nomulti = 0;
 413     n->nouni = 0;
 414     n->nobcast = 0;
 415     /* multiqueue is disabled by default */
 416     n->curr_queues = 1;
 417     timer_del(n->announce_timer);
 418     n->announce_counter = 0;
 419     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 420
 421     /* Flush any MAC and VLAN filter table state */
 422     n->mac_table.in_use = 0;
 423     n->mac_table.first_multi = 0;
 424     n->mac_table.multi_overflow = 0;
 425     n->mac_table.uni_overflow = 0;
 426     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 427     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 428     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 429     memset(n->vlans, 0, MAX_VLAN >> 3);
 430 }
 431
 432 static void peer_test_vnet_hdr(VirtIONet *n)
 433 {
 434     NetClientState *nc = qemu_get_queue(n->nic);
 435     if (!nc->peer) {
 436         return;
 437     }
 438
 439     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 440 }
 441
 442 static int peer_has_vnet_hdr(VirtIONet *n)
 443 {
 444     return n->has_vnet_hdr;
 445 }
 446
 447 static int peer_has_ufo(VirtIONet *n)
 448 {
 449     if (!peer_has_vnet_hdr(n))
 450         return 0;
 451
 452     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 453
 454     return n->has_ufo;
 455 }
 456
 457 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 458                                        int version_1)
 459 {
 460     int i;
 461     NetClientState *nc;
 462
 463     n->mergeable_rx_bufs = mergeable_rx_bufs;
 464
 465     if (version_1) {
 466         n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 467     } else {
 468         n->guest_hdr_len = n->mergeable_rx_bufs ?
 469             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 470             sizeof(struct virtio_net_hdr);
 471     }
 472
 473     for (i = 0; i < n->max_queues; i++) {
 474         nc = qemu_get_subqueue(n->nic, i);
 475
 476         if (peer_has_vnet_hdr(n) &&
 477             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 478             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 479             n->host_hdr_len = n->guest_hdr_len;
 480         }
 481     }
 482 }
 483
 484 static int peer_attach(VirtIONet *n, int index)
 485 {
 486     NetClientState *nc = qemu_get_subqueue(n->nic, index);
 487
 488     if (!nc->peer) {
 489         return 0;
 490     }
 491
 492     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 493         vhost_set_vring_enable(nc->peer, 1);
 494     }
 495
 496     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 497         return 0;
 498     }
 499
 500     return tap_enable(nc->peer);
 501 }
 502
 503 static int peer_detach(VirtIONet *n, int index)
 504 {
 505     NetClientState *nc = qemu_get_subqueue(n->nic, index);
 506
 507     if (!nc->peer) {
 508         return 0;
 509     }
 510
 511     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 512         vhost_set_vring_enable(nc->peer, 0);
 513     }
 514
 515     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 516         return 0;
 517     }
 518
 519     return tap_disable(nc->peer);
 520 }
 521
 522 static void virtio_net_set_queues(VirtIONet *n)
 523 {
 524     int i;
 525     int r;
 526
 527     if (n->nic->peer_deleted) {
 528         return;
 529     }
 530
 531     for (i = 0; i < n->max_queues; i++) {
 532         if (i < n->curr_queues) {
 533             r = peer_attach(n, i);
 534             assert(!r);
 535         } else {
 536             r = peer_detach(n, i);
 537             assert(!r);
 538         }
 539     }
 540 }
 541
 542 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 543
 544 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 545                                         Error **errp)
 546 {
 547     VirtIONet *n = VIRTIO_NET(vdev);
 548     NetClientState *nc = qemu_get_queue(n->nic);
 549
 550     /* Firstly sync all virtio-net possible supported features */
 551     features |= n->host_features;
 552
 553     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 554
 555     if (!peer_has_vnet_hdr(n)) {
 556         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 557         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 558         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 559         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 560
 561         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 562         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 563         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 564         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 565     }
 566
 567     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 568         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 569         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 570     }
 571
 572     if (!get_vhost_net(nc->peer)) {
 573         return features;
 574     }
 575     return vhost_net_get_features(get_vhost_net(nc->peer), features);
 576 }
 577
 578 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 579 {
 580     uint64_t features = 0;
 581
 582     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 583      * but also these: */
 584     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 585     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 586     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 587     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 588     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 589
 590     return features;
 591 }
 592
 593 static void virtio_net_apply_guest_offloads(VirtIONet *n)
 594 {
 595     qemu_set_offload(qemu_get_queue(n->nic)->peer,
 596             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 597             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 598             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 599             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 600             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 601 }
 602
 603 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 604 {
 605     static const uint64_t guest_offloads_mask =
 606         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 607         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 608         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 609         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 610         (1ULL << VIRTIO_NET_F_GUEST_UFO);
 611
 612     return guest_offloads_mask & features;
 613 }
 614
 615 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 616 {
 617     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 618     return virtio_net_guest_offloads_by_features(vdev->guest_features);
 619 }
 620
 621 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 622 {
 623     VirtIONet *n = VIRTIO_NET(vdev);
 624     int i;
 625
 626     virtio_net_set_multiqueue(n,
 627                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
 628
 629     virtio_net_set_mrg_rx_bufs(n,
 630                                virtio_has_feature(features,
 631                                                   VIRTIO_NET_F_MRG_RXBUF),
 632                                virtio_has_feature(features,
 633                                                   VIRTIO_F_VERSION_1));
 634
 635     if (n->has_vnet_hdr) {
 636         n->curr_guest_offloads =
 637             virtio_net_guest_offloads_by_features(features);
 638         virtio_net_apply_guest_offloads(n);
 639     }
 640
 641     for (i = 0;  i < n->max_queues; i++) {
 642         NetClientState *nc = qemu_get_subqueue(n->nic, i);
 643
 644         if (!get_vhost_net(nc->peer)) {
 645             continue;
 646         }
 647         vhost_net_ack_features(get_vhost_net(nc->peer), features);
 648     }
 649
 650     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
 651         memset(n->vlans, 0, MAX_VLAN >> 3);
 652     } else {
 653         memset(n->vlans, 0xff, MAX_VLAN >> 3);
 654     }
 655 }
 656
 657 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
 658                                      struct iovec *iov, unsigned int iov_cnt)
 659 {
 660     uint8_t on;
 661     size_t s;
 662     NetClientState *nc = qemu_get_queue(n->nic);
 663
 664     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
 665     if (s != sizeof(on)) {
 666         return VIRTIO_NET_ERR;
 667     }
 668
 669     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
 670         n->promisc = on;
 671     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
 672         n->allmulti = on;
 673     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
 674         n->alluni = on;
 675     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
 676         n->nomulti = on;
 677     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
 678         n->nouni = on;
 679     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
 680         n->nobcast = on;
 681     } else {
 682         return VIRTIO_NET_ERR;
 683     }
 684
 685     rxfilter_notify(nc);
 686
 687     return VIRTIO_NET_OK;
 688 }
 689
 690 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
 691                                      struct iovec *iov, unsigned int iov_cnt)
 692 {
 693     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 694     uint64_t offloads;
 695     size_t s;
 696
 697     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
 698         return VIRTIO_NET_ERR;
 699     }
 700
 701     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
 702     if (s != sizeof(offloads)) {
 703         return VIRTIO_NET_ERR;
 704     }
 705
 706     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
 707         uint64_t supported_offloads;
 708
 709         if (!n->has_vnet_hdr) {
 710             return VIRTIO_NET_ERR;
 711         }
 712
 713         supported_offloads = virtio_net_supported_guest_offloads(n);
 714         if (offloads & ~supported_offloads) {
 715             return VIRTIO_NET_ERR;
 716         }
 717
 718         n->curr_guest_offloads = offloads;
 719         virtio_net_apply_guest_offloads(n);
 720
 721         return VIRTIO_NET_OK;
 722     } else {
 723         return VIRTIO_NET_ERR;
 724     }
 725 }
 726
 727 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
 728                                  struct iovec *iov, unsigned int iov_cnt)
 729 {
 730     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 731     struct virtio_net_ctrl_mac mac_data;
 732     size_t s;
 733     NetClientState *nc = qemu_get_queue(n->nic);
 734
 735     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
 736         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
 737             return VIRTIO_NET_ERR;
 738         }
 739         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
 740         assert(s == sizeof(n->mac));
 741         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 742         rxfilter_notify(nc);
 743
 744         return VIRTIO_NET_OK;
 745     }
 746
 747     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
 748         return VIRTIO_NET_ERR;
 749     }
 750
 751     int in_use = 0;
 752     int first_multi = 0;
 753     uint8_t uni_overflow = 0;
 754     uint8_t multi_overflow = 0;
 755     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
 756
 757     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
 758                    sizeof(mac_data.entries));
 759     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
 760     if (s != sizeof(mac_data.entries)) {
 761         goto error;
 762     }
 763     iov_discard_front(&iov, &iov_cnt, s);
 764
 765     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
 766         goto error;
 767     }
 768
 769     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
 770         s = iov_to_buf(iov, iov_cnt, 0, macs,
 771                        mac_data.entries * ETH_ALEN);
 772         if (s != mac_data.entries * ETH_ALEN) {
 773             goto error;
 774         }
 775         in_use += mac_data.entries;
 776     } else {
 777         uni_overflow = 1;
 778     }
 779
 780     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
 781
 782     first_multi = in_use;
 783
 784     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
 785                    sizeof(mac_data.entries));
 786     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
 787     if (s != sizeof(mac_data.entries)) {
 788         goto error;
 789     }
 790
 791     iov_discard_front(&iov, &iov_cnt, s);
 792
 793     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
 794         goto error;
 795     }
 796
 797     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
 798         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
 799                        mac_data.entries * ETH_ALEN);
 800         if (s != mac_data.entries * ETH_ALEN) {
 801             goto error;
 802         }
 803         in_use += mac_data.entries;
 804     } else {
 805         multi_overflow = 1;
 806     }
 807
 808     n->mac_table.in_use = in_use;
 809     n->mac_table.first_multi = first_multi;
 810     n->mac_table.uni_overflow = uni_overflow;
 811     n->mac_table.multi_overflow = multi_overflow;
 812     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
 813     g_free(macs);
 814     rxfilter_notify(nc);
 815
 816     return VIRTIO_NET_OK;
 817
 818 error:
 819     g_free(macs);
 820     return VIRTIO_NET_ERR;
 821 }
 822
 823 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
 824                                         struct iovec *iov, unsigned int iov_cnt)
 825 {
 826     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 827     uint16_t vid;
 828     size_t s;
 829     NetClientState *nc = qemu_get_queue(n->nic);
 830
 831     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
 832     vid = virtio_lduw_p(vdev, &vid);
 833     if (s != sizeof(vid)) {
 834         return VIRTIO_NET_ERR;
 835     }
 836
 837     if (vid >= MAX_VLAN)
 838         return VIRTIO_NET_ERR;
 839
 840     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
 841         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
 842     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
 843         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
 844     else
 845         return VIRTIO_NET_ERR;
 846
 847     rxfilter_notify(nc);
 848
 849     return VIRTIO_NET_OK;
 850 }
 851
 852 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
 853                                       struct iovec *iov, unsigned int iov_cnt)
 854 {
 855     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
 856         n->status & VIRTIO_NET_S_ANNOUNCE) {
 857         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 858         if (n->announce_counter) {
 859             timer_mod(n->announce_timer,
 860                       qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
 861                       self_announce_delay(n->announce_counter));
 862         }
 863         return VIRTIO_NET_OK;
 864     } else {
 865         return VIRTIO_NET_ERR;
 866     }
 867 }
 868
 869 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
 870                                 struct iovec *iov, unsigned int iov_cnt)
 871 {
 872     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 873     struct virtio_net_ctrl_mq mq;
 874     size_t s;
 875     uint16_t queues;
 876
 877     s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
 878     if (s != sizeof(mq)) {
 879         return VIRTIO_NET_ERR;
 880     }
 881
 882     if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
 883         return VIRTIO_NET_ERR;
 884     }
 885
 886     queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
 887
 888     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
 889         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
 890         queues > n->max_queues ||
 891         !n->multiqueue) {
 892         return VIRTIO_NET_ERR;
 893     }
 894
 895     n->curr_queues = queues;
 896     /* stop the backend before changing the number of queues to avoid handling a
 897      * disabled queue */
 898     virtio_net_set_status(vdev, vdev->status);
 899     virtio_net_set_queues(n);
 900
 901     return VIRTIO_NET_OK;
 902 }
 903
 904 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
 905 {
 906     VirtIONet *n = VIRTIO_NET(vdev);
 907     struct virtio_net_ctrl_hdr ctrl;
 908     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
 909     VirtQueueElement *elem;
 910     size_t s;
 911     struct iovec *iov, *iov2;
 912     unsigned int iov_cnt;
 913
 914     for (;;) {
 915         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
 916         if (!elem) {
 917             break;
 918         }
 919         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
 920             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
 921             virtio_error(vdev, "virtio-net ctrl missing headers");
 922             virtqueue_detach_element(vq, elem, 0);
 923             g_free(elem);
 924             break;
 925         }
 926
 927         iov_cnt = elem->out_num;
 928         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
 929         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
 930         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
 931         if (s != sizeof(ctrl)) {
 932             status = VIRTIO_NET_ERR;
 933         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
 934             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
 935         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
 936             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
 937         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
 938             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
 939         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
 940             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
 941         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
 942             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
 943         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
 944             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
 945         }
 946
 947         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
 948         assert(s == sizeof(status));
 949
 950         virtqueue_push(vq, elem, sizeof(status));
 951         virtio_notify(vdev, vq);
 952         g_free(iov2);
 953         g_free(elem);
 954     }
 955 }
 956
 957 /* RX */
 958
 959 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
 960 {
 961     VirtIONet *n = VIRTIO_NET(vdev);
 962     int queue_index = vq2q(virtio_get_queue_index(vq));
 963
 964     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
 965 }
 966
 967 static int virtio_net_can_receive(NetClientState *nc)
 968 {
 969     VirtIONet *n = qemu_get_nic_opaque(nc);
 970     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 971     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
 972
 973     if (!vdev->vm_running) {
 974         return 0;
 975     }
 976
 977     if (nc->queue_index >= n->curr_queues) {
 978         return 0;
 979     }
 980
 981     if (!virtio_queue_ready(q->rx_vq) ||
 982         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
 983         return 0;
 984     }
 985
 986     return 1;
 987 }
 988
 989 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
 990 {
 991     VirtIONet *n = q->n;
 992     if (virtio_queue_empty(q->rx_vq) ||
 993         (n->mergeable_rx_bufs &&
 994          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
 995         virtio_queue_set_notification(q->rx_vq, 1);
 996
 997         /* To avoid a race condition where the guest has made some buffers
 998          * available after the above check but before notification was
 999          * enabled, check for available buffers again.
1000          */
1001         if (virtio_queue_empty(q->rx_vq) ||
1002             (n->mergeable_rx_bufs &&
1003              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1004             return 0;
1005         }
1006     }
1007
1008     virtio_queue_set_notification(q->rx_vq, 0);
1009     return 1;
1010 }
1011
1012 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1013 {
1014     virtio_tswap16s(vdev, &hdr->hdr_len);
1015     virtio_tswap16s(vdev, &hdr->gso_size);
1016     virtio_tswap16s(vdev, &hdr->csum_start);
1017     virtio_tswap16s(vdev, &hdr->csum_offset);
1018 }
1019
1020 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1021  * it never finds out that the packets don't have valid checksums.  This
1022  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1023  * fix this with Xen but it hasn't appeared in an upstream release of
1024  * dhclient yet.
1025  *
1026  * To avoid breaking existing guests, we catch udp packets and add
1027  * checksums.  This is terrible but it's better than hacking the guest
1028  * kernels.
1029  *
1030  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1031  * we should provide a mechanism to disable it to avoid polluting the host
1032  * cache.
1033  */
1034 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1035                                         uint8_t *buf, size_t size)
1036 {
1037     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1038         (size > 27 && size < 1500) && /* normal sized MTU */
1039         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1040         (buf[23] == 17) && /* ip.protocol == UDP */
1041         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1042         net_checksum_calculate(buf, size);
1043         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1044     }
1045 }
1046
1047 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1048                            const void *buf, size_t size)
1049 {
1050     if (n->has_vnet_hdr) {
1051         /* FIXME this cast is evil */
1052         void *wbuf = (void *)buf;
1053         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1054                                     size - n->host_hdr_len);
1055
1056         if (n->needs_vnet_hdr_swap) {
1057             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1058         }
1059         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1060     } else {
1061         struct virtio_net_hdr hdr = {
1062             .flags = 0,
1063             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1064         };
1065         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1066     }
1067 }
1068
1069 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1070 {
1071     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1072     static const uint8_t vlan[] = {0x81, 0x00};
1073     uint8_t *ptr = (uint8_t *)buf;
1074     int i;
1075
1076     if (n->promisc)
1077         return 1;
1078
1079     ptr += n->host_hdr_len;
1080
1081     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1082         int vid = lduw_be_p(ptr + 14) & 0xfff;
1083         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1084             return 0;
1085     }
1086
1087     if (ptr[0] & 1) { // multicast
1088         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1089             return !n->nobcast;
1090         } else if (n->nomulti) {
1091             return 0;
1092         } else if (n->allmulti || n->mac_table.multi_overflow) {
1093             return 1;
1094         }
1095
1096         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1097             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1098                 return 1;
1099             }
1100         }
1101     } else { // unicast
1102         if (n->nouni) {
1103             return 0;
1104         } else if (n->alluni || n->mac_table.uni_overflow) {
1105             return 1;
1106         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1107             return 1;
1108         }
1109
1110         for (i = 0; i < n->mac_table.first_multi; i++) {
1111             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1112                 return 1;
1113             }
1114         }
1115     }
1116
1117     return 0;
1118 }
1119
1120 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1121 {
1122     VirtIONet *n = qemu_get_nic_opaque(nc);
1123     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1124     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1125     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1126     struct virtio_net_hdr_mrg_rxbuf mhdr;
1127     unsigned mhdr_cnt = 0;
1128     size_t offset, i, guest_offset;
1129
1130     if (!virtio_net_can_receive(nc)) {
1131         return -1;
1132     }
1133
1134     /* hdr_len refers to the header we supply to the guest */
1135     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1136         return 0;
1137     }
1138
1139     if (!receive_filter(n, buf, size))
1140         return size;
1141
1142     offset = i = 0;
1143
1144     while (offset < size) {
1145         VirtQueueElement *elem;
1146         int len, total;
1147         const struct iovec *sg;
1148
1149         total = 0;
1150
1151         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1152         if (!elem) {
1153             if (i) {
1154                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1155                              "i %zd mergeable %d offset %zd, size %zd, "
1156                              "guest hdr len %zd, host hdr len %zd "
1157                              "guest features 0x%" PRIx64,
1158                              i, n->mergeable_rx_bufs, offset, size,
1159                              n->guest_hdr_len, n->host_hdr_len,
1160                              vdev->guest_features);
1161             }
1162             return -1;
1163         }
1164
1165         if (elem->in_num < 1) {
1166             virtio_error(vdev,
1167                          "virtio-net receive queue contains no in buffers");
1168             virtqueue_detach_element(q->rx_vq, elem, 0);
1169             g_free(elem);
1170             return -1;
1171         }
1172
1173         sg = elem->in_sg;
1174         if (i == 0) {
1175             assert(offset == 0);
1176             if (n->mergeable_rx_bufs) {
1177                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1178                                     sg, elem->in_num,
1179                                     offsetof(typeof(mhdr), num_buffers),
1180                                     sizeof(mhdr.num_buffers));
1181             }
1182
1183             receive_header(n, sg, elem->in_num, buf, size);
1184             offset = n->host_hdr_len;
1185             total += n->guest_hdr_len;
1186             guest_offset = n->guest_hdr_len;
1187         } else {
1188             guest_offset = 0;
1189         }
1190
1191         /* copy in packet.  ugh */
1192         len = iov_from_buf(sg, elem->in_num, guest_offset,
1193                            buf + offset, size - offset);
1194         total += len;
1195         offset += len;
1196         /* If buffers can't be merged, at this point we
1197          * must have consumed the complete packet.
1198          * Otherwise, drop it. */
1199         if (!n->mergeable_rx_bufs && offset < size) {
1200             virtqueue_unpop(q->rx_vq, elem, total);
1201             g_free(elem);
1202             return size;
1203         }
1204
1205         /* signal other side */
1206         virtqueue_fill(q->rx_vq, elem, total, i++);
1207         g_free(elem);
1208     }
1209
1210     if (mhdr_cnt) {
1211         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1212         iov_from_buf(mhdr_sg, mhdr_cnt,
1213                      0,
1214                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1215     }
1216
1217     virtqueue_flush(q->rx_vq, i);
1218     virtio_notify(vdev, q->rx_vq);
1219
1220     return size;
1221 }
1222
1223 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
1224
1225 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
1226 {
1227     VirtIONet *n = qemu_get_nic_opaque(nc);
1228     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1229     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1230
1231     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
1232     virtio_notify(vdev, q->tx_vq);
1233
1234     g_free(q->async_tx.elem);
1235     q->async_tx.elem = NULL;
1236
1237     virtio_queue_set_notification(q->tx_vq, 1);
1238     virtio_net_flush_tx(q);
1239 }
1240
1241 /* TX */
1242 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
1243 {
1244     VirtIONet *n = q->n;
1245     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1246     VirtQueueElement *elem;
1247     int32_t num_packets = 0;
1248     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
1249     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1250         return num_packets;
1251     }
1252
1253     if (q->async_tx.elem) {
1254         virtio_queue_set_notification(q->tx_vq, 0);
1255         return num_packets;
1256     }
1257
1258     for (;;) {
1259         ssize_t ret;
1260         unsigned int out_num;
1261         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
1262         struct virtio_net_hdr_mrg_rxbuf mhdr;
1263
1264         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
1265         if (!elem) {
1266             break;
1267         }
1268
1269         out_num = elem->out_num;
1270         out_sg = elem->out_sg;
1271         if (out_num < 1) {
1272             virtio_error(vdev, "virtio-net header not in first element");
1273             virtqueue_detach_element(q->tx_vq, elem, 0);
1274             g_free(elem);
1275             return -EINVAL;
1276         }
1277
1278         if (n->has_vnet_hdr) {
1279             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
1280                 n->guest_hdr_len) {
1281                 virtio_error(vdev, "virtio-net header incorrect");
1282                 virtqueue_detach_element(q->tx_vq, elem, 0);
1283                 g_free(elem);
1284                 return -EINVAL;
1285             }
1286             if (n->needs_vnet_hdr_swap) {
1287                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
1288                 sg2[0].iov_base = &mhdr;
1289                 sg2[0].iov_len = n->guest_hdr_len;
1290                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
1291                                    out_sg, out_num,
1292                                    n->guest_hdr_len, -1);
1293                 if (out_num == VIRTQUEUE_MAX_SIZE) {
1294                     goto drop;
1295                 }
1296                 out_num += 1;
1297                 out_sg = sg2;
1298             }
1299         }
1300         /*
1301          * If host wants to see the guest header as is, we can
1302          * pass it on unchanged. Otherwise, copy just the parts
1303          * that host is interested in.
1304          */
1305         assert(n->host_hdr_len <= n->guest_hdr_len);
1306         if (n->host_hdr_len != n->guest_hdr_len) {
1307             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
1308                                        out_sg, out_num,
1309                                        0, n->host_hdr_len);
1310             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
1311                              out_sg, out_num,
1312                              n->guest_hdr_len, -1);
1313             out_num = sg_num;
1314             out_sg = sg;
1315         }
1316
1317         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
1318                                       out_sg, out_num, virtio_net_tx_complete);
1319         if (ret == 0) {
1320             virtio_queue_set_notification(q->tx_vq, 0);
1321             q->async_tx.elem = elem;
1322             return -EBUSY;
1323         }
1324
1325 drop:
1326         virtqueue_push(q->tx_vq, elem, 0);
1327         virtio_notify(vdev, q->tx_vq);
1328         g_free(elem);
1329
1330         if (++num_packets >= n->tx_burst) {
1331             break;
1332         }
1333     }
1334     return num_packets;
1335 }
1336
1337 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
1338 {
1339     VirtIONet *n = VIRTIO_NET(vdev);
1340     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
1341
1342     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
1343         virtio_net_drop_tx_queue_data(vdev, vq);
1344         return;
1345     }
1346
1347     /* This happens when device was stopped but VCPU wasn't. */
1348     if (!vdev->vm_running) {
1349         q->tx_waiting = 1;
1350         return;
1351     }
1352
1353     if (q->tx_waiting) {
1354         virtio_queue_set_notification(vq, 1);
1355         timer_del(q->tx_timer);
1356         q->tx_waiting = 0;
1357         if (virtio_net_flush_tx(q) == -EINVAL) {
1358             return;
1359         }
1360     } else {
1361         timer_mod(q->tx_timer,
1362                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
1363         q->tx_waiting = 1;
1364         virtio_queue_set_notification(vq, 0);
1365     }
1366 }
1367
1368 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
1369 {
1370     VirtIONet *n = VIRTIO_NET(vdev);
1371     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
1372
1373     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
1374         virtio_net_drop_tx_queue_data(vdev, vq);
1375         return;
1376     }
1377
1378     if (unlikely(q->tx_waiting)) {
1379         return;
1380     }
1381     q->tx_waiting = 1;
1382     /* This happens when device was stopped but VCPU wasn't. */
1383     if (!vdev->vm_running) {
1384         return;
1385     }
1386     virtio_queue_set_notification(vq, 0);
1387     qemu_bh_schedule(q->tx_bh);
1388 }
1389
1390 static void virtio_net_tx_timer(void *opaque)
1391 {
1392     VirtIONetQueue *q = opaque;
1393     VirtIONet *n = q->n;
1394     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1395     /* This happens when device was stopped but BH wasn't. */
1396     if (!vdev->vm_running) {
1397         /* Make sure tx waiting is set, so we'll run when restarted. */
1398         assert(q->tx_waiting);
1399         return;
1400     }
1401
1402     q->tx_waiting = 0;
1403
1404     /* Just in case the driver is not ready on more */
1405     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1406         return;
1407     }
1408
1409     virtio_queue_set_notification(q->tx_vq, 1);
1410     virtio_net_flush_tx(q);
1411 }
1412
1413 static void virtio_net_tx_bh(void *opaque)
1414 {
1415     VirtIONetQueue *q = opaque;
1416     VirtIONet *n = q->n;
1417     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1418     int32_t ret;
1419
1420     /* This happens when device was stopped but BH wasn't. */
1421     if (!vdev->vm_running) {
1422         /* Make sure tx waiting is set, so we'll run when restarted. */
1423         assert(q->tx_waiting);
1424         return;
1425     }
1426
1427     q->tx_waiting = 0;
1428
1429     /* Just in case the driver is not ready on more */
1430     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
1431         return;
1432     }
1433
1434     ret = virtio_net_flush_tx(q);
1435     if (ret == -EBUSY || ret == -EINVAL) {
1436         return; /* Notification re-enable handled by tx_complete or device
1437                  * broken */
1438     }
1439
1440     /* If we flush a full burst of packets, assume there are
1441      * more coming and immediately reschedule */
1442     if (ret >= n->tx_burst) {
1443         qemu_bh_schedule(q->tx_bh);
1444         q->tx_waiting = 1;
1445         return;
1446     }
1447
1448     /* If less than a full burst, re-enable notification and flush
1449      * anything that may have come in while we weren't looking.  If
1450      * we find something, assume the guest is still active and reschedule */
1451     virtio_queue_set_notification(q->tx_vq, 1);
1452     ret = virtio_net_flush_tx(q);
1453     if (ret == -EINVAL) {
1454         return;
1455     } else if (ret > 0) {
1456         virtio_queue_set_notification(q->tx_vq, 0);
1457         qemu_bh_schedule(q->tx_bh);
1458         q->tx_waiting = 1;
1459     }
1460 }
1461
1462 static void virtio_net_add_queue(VirtIONet *n, int index)
1463 {
1464     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1465
1466     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
1467                                            virtio_net_handle_rx);
1468     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
1469         n->vqs[index].tx_vq =
1470             virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
1471         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
1472                                               virtio_net_tx_timer,
1473                                               &n->vqs[index]);
1474     } else {
1475         n->vqs[index].tx_vq =
1476             virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
1477         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
1478     }
1479
1480     n->vqs[index].tx_waiting = 0;
1481     n->vqs[index].n = n;
1482 }
1483
1484 static void virtio_net_del_queue(VirtIONet *n, int index)
1485 {
1486     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1487     VirtIONetQueue *q = &n->vqs[index];
1488     NetClientState *nc = qemu_get_subqueue(n->nic, index);
1489
1490     qemu_purge_queued_packets(nc);
1491
1492     virtio_del_queue(vdev, index * 2);
1493     if (q->tx_timer) {
1494         timer_del(q->tx_timer);
1495         timer_free(q->tx_timer);
1496     } else {
1497         qemu_bh_delete(q->tx_bh);
1498     }
1499     virtio_del_queue(vdev, index * 2 + 1);
1500 }
1501
1502 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
1503 {
1504     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1505     int old_num_queues = virtio_get_num_queues(vdev);
1506     int new_num_queues = new_max_queues * 2 + 1;
1507     int i;
1508
1509     assert(old_num_queues >= 3);
1510     assert(old_num_queues % 2 == 1);
1511
1512     if (old_num_queues == new_num_queues) {
1513         return;
1514     }
1515
1516     /*
1517      * We always need to remove and add ctrl vq if
1518      * old_num_queues != new_num_queues. Remove ctrl_vq first,
1519      * and then we only enter one of the following too loops.
1520      */
1521     virtio_del_queue(vdev, old_num_queues - 1);
1522
1523     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
1524         /* new_num_queues < old_num_queues */
1525         virtio_net_del_queue(n, i / 2);
1526     }
1527
1528     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
1529         /* new_num_queues > old_num_queues */
1530         virtio_net_add_queue(n, i / 2);
1531     }
1532
1533     /* add ctrl_vq last */
1534     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1535 }
1536
1537 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
1538 {
1539     int max = multiqueue ? n->max_queues : 1;
1540
1541     n->multiqueue = multiqueue;
1542     virtio_net_change_num_queues(n, max);
1543
1544     virtio_net_set_queues(n);
1545 }
1546
1547 static void virtio_net_save_device(VirtIODevice *vdev, QEMUFile *f)
1548 {
1549     VirtIONet *n = VIRTIO_NET(vdev);
1550     int i;
1551
1552     qemu_put_buffer(f, n->mac, ETH_ALEN);
1553     qemu_put_be32(f, n->vqs[0].tx_waiting);
1554     qemu_put_be32(f, n->mergeable_rx_bufs);
1555     qemu_put_be16(f, n->status);
1556     qemu_put_byte(f, n->promisc);
1557     qemu_put_byte(f, n->allmulti);
1558     qemu_put_be32(f, n->mac_table.in_use);
1559     qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
1560     qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
1561     qemu_put_be32(f, n->has_vnet_hdr);
1562     qemu_put_byte(f, n->mac_table.multi_overflow);
1563     qemu_put_byte(f, n->mac_table.uni_overflow);
1564     qemu_put_byte(f, n->alluni);
1565     qemu_put_byte(f, n->nomulti);
1566     qemu_put_byte(f, n->nouni);
1567     qemu_put_byte(f, n->nobcast);
1568     qemu_put_byte(f, n->has_ufo);
1569     if (n->max_queues > 1) {
1570         qemu_put_be16(f, n->max_queues);
1571         qemu_put_be16(f, n->curr_queues);
1572         for (i = 1; i < n->curr_queues; i++) {
1573             qemu_put_be32(f, n->vqs[i].tx_waiting);
1574         }
1575     }
1576
1577     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1578         qemu_put_be64(f, n->curr_guest_offloads);
1579     }
1580 }
1581
1582 static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
1583                                   int version_id)
1584 {
1585     VirtIONet *n = VIRTIO_NET(vdev);
1586     int i, link_down;
1587
1588     qemu_get_buffer(f, n->mac, ETH_ALEN);
1589     n->vqs[0].tx_waiting = qemu_get_be32(f);
1590
1591     virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f),
1592                                virtio_vdev_has_feature(vdev,
1593                                                        VIRTIO_F_VERSION_1));
1594
1595     n->status = qemu_get_be16(f);
1596
1597     n->promisc = qemu_get_byte(f);
1598     n->allmulti = qemu_get_byte(f);
1599
1600     n->mac_table.in_use = qemu_get_be32(f);
1601     /* MAC_TABLE_ENTRIES may be different from the saved image */
1602     if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
1603         qemu_get_buffer(f, n->mac_table.macs,
1604                         n->mac_table.in_use * ETH_ALEN);
1605     } else {
1606         int64_t i;
1607
1608         /* Overflow detected - can happen if source has a larger MAC table.
1609          * We simply set overflow flag so there's no need to maintain the
1610          * table of addresses, discard them all.
1611          * Note: 64 bit math to avoid integer overflow.
1612          */
1613         for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) {
1614             qemu_get_byte(f);
1615         }
1616         n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
1617         n->mac_table.in_use = 0;
1618     }
1619
1620     qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
1621
1622     if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
1623         error_report("virtio-net: saved image requires vnet_hdr=on");
1624         return -1;
1625     }
1626
1627     n->mac_table.multi_overflow = qemu_get_byte(f);
1628     n->mac_table.uni_overflow = qemu_get_byte(f);
1629
1630     n->alluni = qemu_get_byte(f);
1631     n->nomulti = qemu_get_byte(f);
1632     n->nouni = qemu_get_byte(f);
1633     n->nobcast = qemu_get_byte(f);
1634
1635     if (qemu_get_byte(f) && !peer_has_ufo(n)) {
1636         error_report("virtio-net: saved image requires TUN_F_UFO support");
1637         return -1;
1638     }
1639
1640     if (n->max_queues > 1) {
1641         if (n->max_queues != qemu_get_be16(f)) {
1642             error_report("virtio-net: different max_queues ");
1643             return -1;
1644         }
1645
1646         n->curr_queues = qemu_get_be16(f);
1647         if (n->curr_queues > n->max_queues) {
1648             error_report("virtio-net: curr_queues %x > max_queues %x",
1649                          n->curr_queues, n->max_queues);
1650             return -1;
1651         }
1652         for (i = 1; i < n->curr_queues; i++) {
1653             n->vqs[i].tx_waiting = qemu_get_be32(f);
1654         }
1655     }
1656
1657     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1658         n->curr_guest_offloads = qemu_get_be64(f);
1659     } else {
1660         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
1661     }
1662
1663     if (peer_has_vnet_hdr(n)) {
1664         virtio_net_apply_guest_offloads(n);
1665     }
1666
1667     virtio_net_set_queues(n);
1668
1669     /* Find the first multicast entry in the saved MAC filter */
1670     for (i = 0; i < n->mac_table.in_use; i++) {
1671         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
1672             break;
1673         }
1674     }
1675     n->mac_table.first_multi = i;
1676
1677     /* nc.link_down can't be migrated, so infer link_down according
1678      * to link status bit in n->status */
1679     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
1680     for (i = 0; i < n->max_queues; i++) {
1681         qemu_get_subqueue(n->nic, i)->link_down = link_down;
1682     }
1683
1684     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
1685         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
1686         n->announce_counter = SELF_ANNOUNCE_ROUNDS;
1687         timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
1688     }
1689
1690     return 0;
1691 }
1692
1693 static NetClientInfo net_virtio_info = {
1694     .type = NET_CLIENT_DRIVER_NIC,
1695     .size = sizeof(NICState),
1696     .can_receive = virtio_net_can_receive,
1697     .receive = virtio_net_receive,
1698     .link_status_changed = virtio_net_set_link_status,
1699     .query_rx_filter = virtio_net_query_rxfilter,
1700 };
1701
1702 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
1703 {
1704     VirtIONet *n = VIRTIO_NET(vdev);
1705     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1706     assert(n->vhost_started);
1707     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
1708 }
1709
1710 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
1711                                            bool mask)
1712 {
1713     VirtIONet *n = VIRTIO_NET(vdev);
1714     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1715     assert(n->vhost_started);
1716     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
1717                              vdev, idx, mask);
1718 }
1719
1720 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
1721 {
1722     int i, config_size = 0;
1723     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
1724     for (i = 0; feature_sizes[i].flags != 0; i++) {
1725         if (host_features & feature_sizes[i].flags) {
1726             config_size = MAX(feature_sizes[i].end, config_size);
1727         }
1728     }
1729     n->config_size = config_size;
1730 }
1731
1732 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
1733                                    const char *type)
1734 {
1735     /*
1736      * The name can be NULL, the netclient name will be type.x.
1737      */
1738     assert(type != NULL);
1739
1740     g_free(n->netclient_name);
1741     g_free(n->netclient_type);
1742     n->netclient_name = g_strdup(name);
1743     n->netclient_type = g_strdup(type);
1744 }
1745
1746 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
1747 {
1748     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1749     VirtIONet *n = VIRTIO_NET(dev);
1750     NetClientState *nc;
1751     int i;
1752
1753     virtio_net_set_config_size(n, n->host_features);
1754     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
1755
1756     /*
1757      * We set a lower limit on RX queue size to what it always was.
1758      * Guests that want a smaller ring can always resize it without
1759      * help from us (using virtio 1 and up).
1760      */
1761     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
1762         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
1763         (n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) {
1764         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
1765                    "must be a power of 2 between %d and %d.",
1766                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
1767                    VIRTQUEUE_MAX_SIZE);
1768         virtio_cleanup(vdev);
1769         return;
1770     }
1771
1772     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
1773     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
1774         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
1775                    "must be a positive integer less than %d.",
1776                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
1777         virtio_cleanup(vdev);
1778         return;
1779     }
1780     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
1781     n->curr_queues = 1;
1782     n->tx_timeout = n->net_conf.txtimer;
1783
1784     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
1785                        && strcmp(n->net_conf.tx, "bh")) {
1786         error_report("virtio-net: "
1787                      "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
1788                      n->net_conf.tx);
1789         error_report("Defaulting to \"bh\"");
1790     }
1791
1792     for (i = 0; i < n->max_queues; i++) {
1793         virtio_net_add_queue(n, i);
1794     }
1795
1796     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1797     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
1798     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
1799     n->status = VIRTIO_NET_S_LINK_UP;
1800     n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1801                                      virtio_net_announce_timer, n);
1802
1803     if (n->netclient_type) {
1804         /*
1805          * Happen when virtio_net_set_netclient_name has been called.
1806          */
1807         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
1808                               n->netclient_type, n->netclient_name, n);
1809     } else {
1810         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
1811                               object_get_typename(OBJECT(dev)), dev->id, n);
1812     }
1813
1814     peer_test_vnet_hdr(n);
1815     if (peer_has_vnet_hdr(n)) {
1816         for (i = 0; i < n->max_queues; i++) {
1817             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
1818         }
1819         n->host_hdr_len = sizeof(struct virtio_net_hdr);
1820     } else {
1821         n->host_hdr_len = 0;
1822     }
1823
1824     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
1825
1826     n->vqs[0].tx_waiting = 0;
1827     n->tx_burst = n->net_conf.txburst;
1828     virtio_net_set_mrg_rx_bufs(n, 0, 0);
1829     n->promisc = 1; /* for compatibility */
1830
1831     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1832
1833     n->vlans = g_malloc0(MAX_VLAN >> 3);
1834
1835     nc = qemu_get_queue(n->nic);
1836     nc->rxfilter_notify_enabled = 1;
1837
1838     n->qdev = dev;
1839 }
1840
1841 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
1842 {
1843     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1844     VirtIONet *n = VIRTIO_NET(dev);
1845     int i, max_queues;
1846
1847     /* This will stop vhost backend if appropriate. */
1848     virtio_net_set_status(vdev, 0);
1849
1850     g_free(n->netclient_name);
1851     n->netclient_name = NULL;
1852     g_free(n->netclient_type);
1853     n->netclient_type = NULL;
1854
1855     g_free(n->mac_table.macs);
1856     g_free(n->vlans);
1857
1858     max_queues = n->multiqueue ? n->max_queues : 1;
1859     for (i = 0; i < max_queues; i++) {
1860         virtio_net_del_queue(n, i);
1861     }
1862
1863     timer_del(n->announce_timer);
1864     timer_free(n->announce_timer);
1865     g_free(n->vqs);
1866     qemu_del_nic(n->nic);
1867     virtio_cleanup(vdev);
1868 }
1869
1870 static void virtio_net_instance_init(Object *obj)
1871 {
1872     VirtIONet *n = VIRTIO_NET(obj);
1873
1874     /*
1875      * The default config_size is sizeof(struct virtio_net_config).
1876      * Can be overriden with virtio_net_set_config_size.
1877      */
1878     n->config_size = sizeof(struct virtio_net_config);
1879     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
1880                                   "bootindex", "/ethernet-phy@0",
1881                                   DEVICE(n), NULL);
1882 }
1883
1884 static void virtio_net_pre_save(void *opaque)
1885 {
1886     VirtIONet *n = opaque;
1887
1888     /* At this point, backend must be stopped, otherwise
1889      * it might keep writing to memory. */
1890     assert(!n->vhost_started);
1891 }
1892
1893 static const VMStateDescription vmstate_virtio_net = {
1894     .name = "virtio-net",
1895     .minimum_version_id = VIRTIO_NET_VM_VERSION,
1896     .version_id = VIRTIO_NET_VM_VERSION,
1897     .fields = (VMStateField[]) {
1898         VMSTATE_VIRTIO_DEVICE,
1899         VMSTATE_END_OF_LIST()
1900     },
1901     .pre_save = virtio_net_pre_save,
1902 };
1903
1904 static Property virtio_net_properties[] = {
1905     DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true),
1906     DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features,
1907                     VIRTIO_NET_F_GUEST_CSUM, true),
1908     DEFINE_PROP_BIT("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
1909     DEFINE_PROP_BIT("guest_tso4", VirtIONet, host_features,
1910                     VIRTIO_NET_F_GUEST_TSO4, true),
1911     DEFINE_PROP_BIT("guest_tso6", VirtIONet, host_features,
1912                     VIRTIO_NET_F_GUEST_TSO6, true),
1913     DEFINE_PROP_BIT("guest_ecn", VirtIONet, host_features,
1914                     VIRTIO_NET_F_GUEST_ECN, true),
1915     DEFINE_PROP_BIT("guest_ufo", VirtIONet, host_features,
1916                     VIRTIO_NET_F_GUEST_UFO, true),
1917     DEFINE_PROP_BIT("guest_announce", VirtIONet, host_features,
1918                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
1919     DEFINE_PROP_BIT("host_tso4", VirtIONet, host_features,
1920                     VIRTIO_NET_F_HOST_TSO4, true),
1921     DEFINE_PROP_BIT("host_tso6", VirtIONet, host_features,
1922                     VIRTIO_NET_F_HOST_TSO6, true),
1923     DEFINE_PROP_BIT("host_ecn", VirtIONet, host_features,
1924                     VIRTIO_NET_F_HOST_ECN, true),
1925     DEFINE_PROP_BIT("host_ufo", VirtIONet, host_features,
1926                     VIRTIO_NET_F_HOST_UFO, true),
1927     DEFINE_PROP_BIT("mrg_rxbuf", VirtIONet, host_features,
1928                     VIRTIO_NET_F_MRG_RXBUF, true),
1929     DEFINE_PROP_BIT("status", VirtIONet, host_features,
1930                     VIRTIO_NET_F_STATUS, true),
1931     DEFINE_PROP_BIT("ctrl_vq", VirtIONet, host_features,
1932                     VIRTIO_NET_F_CTRL_VQ, true),
1933     DEFINE_PROP_BIT("ctrl_rx", VirtIONet, host_features,
1934                     VIRTIO_NET_F_CTRL_RX, true),
1935     DEFINE_PROP_BIT("ctrl_vlan", VirtIONet, host_features,
1936                     VIRTIO_NET_F_CTRL_VLAN, true),
1937     DEFINE_PROP_BIT("ctrl_rx_extra", VirtIONet, host_features,
1938                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
1939     DEFINE_PROP_BIT("ctrl_mac_addr", VirtIONet, host_features,
1940                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
1941     DEFINE_PROP_BIT("ctrl_guest_offloads", VirtIONet, host_features,
1942                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
1943     DEFINE_PROP_BIT("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
1944     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
1945     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
1946                        TX_TIMER_INTERVAL),
1947     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
1948     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
1949     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
1950                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
1951     DEFINE_PROP_END_OF_LIST(),
1952 };
1953
1954 static void virtio_net_class_init(ObjectClass *klass, void *data)
1955 {
1956     DeviceClass *dc = DEVICE_CLASS(klass);
1957     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
1958
1959     dc->props = virtio_net_properties;
1960     dc->vmsd = &vmstate_virtio_net;
1961     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1962     vdc->realize = virtio_net_device_realize;
1963     vdc->unrealize = virtio_net_device_unrealize;
1964     vdc->get_config = virtio_net_get_config;
1965     vdc->set_config = virtio_net_set_config;
1966     vdc->get_features = virtio_net_get_features;
1967     vdc->set_features = virtio_net_set_features;
1968     vdc->bad_features = virtio_net_bad_features;
1969     vdc->reset = virtio_net_reset;
1970     vdc->set_status = virtio_net_set_status;
1971     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
1972     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
1973     vdc->load = virtio_net_load_device;
1974     vdc->save = virtio_net_save_device;
1975     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
1976 }
1977
1978 static const TypeInfo virtio_net_info = {
1979     .name = TYPE_VIRTIO_NET,
1980     .parent = TYPE_VIRTIO_DEVICE,
1981     .instance_size = sizeof(VirtIONet),
1982     .instance_init = virtio_net_instance_init,
1983     .class_init = virtio_net_class_init,
1984 };
1985
1986 static void virtio_register_types(void)
1987 {
1988     type_register_static(&virtio_net_info);
1989 }
1990
1991 type_init(virtio_register_types)