hw/net/virtio-net.c

   1 /*
   2  * Virtio Network Device
   3  *
   4  * Copyright IBM, Corp. 2007
   5  *
   6  * Authors:
   7  *  Anthony Liguori   <aliguori@us.ibm.com>
   8  *
   9  * This work is licensed under the terms of the GNU GPL, version 2.  See
  10  * the COPYING file in the top-level directory.
  11  *
  12  */
  13
  14 #include "qemu/osdep.h"
  15 #include "qemu/iov.h"
  16 #include "hw/virtio/virtio.h"
  17 #include "net/net.h"
  18 #include "net/checksum.h"
  19 #include "net/tap.h"
  20 #include "qemu/error-report.h"
  21 #include "qemu/timer.h"
  22 #include "hw/virtio/virtio-net.h"
  23 #include "net/vhost_net.h"
  24 #include "hw/virtio/virtio-bus.h"
  25 #include "qapi/qmp/qjson.h"
  26 #include "qapi-event.h"
  27 #include "hw/virtio/virtio-access.h"
  28 #include "migration/misc.h"
  29
  30 #define VIRTIO_NET_VM_VERSION    11
  31
  32 #define MAC_TABLE_ENTRIES    64
  33 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  34
  35 /* previously fixed value */
  36 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  37 /* for now, only allow larger queues; with virtio-1, guest can downsize */
  38 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  39
  40 /*
  41  * Calculate the number of bytes up to and including the given 'field' of
  42  * 'container'.
  43  */
  44 #define endof(container, field) \
  45     (offsetof(container, field) + sizeof(((container *)0)->field))
  46
  47 typedef struct VirtIOFeature {
  48     uint32_t flags;
  49     size_t end;
  50 } VirtIOFeature;
  51
  52 static VirtIOFeature feature_sizes[] = {
  53     {.flags = 1 << VIRTIO_NET_F_MAC,
  54      .end = endof(struct virtio_net_config, mac)},
  55     {.flags = 1 << VIRTIO_NET_F_STATUS,
  56      .end = endof(struct virtio_net_config, status)},
  57     {.flags = 1 << VIRTIO_NET_F_MQ,
  58      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
  59     {.flags = 1 << VIRTIO_NET_F_MTU,
  60      .end = endof(struct virtio_net_config, mtu)},
  61     {}
  62 };
  63
  64 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
  65 {
  66     VirtIONet *n = qemu_get_nic_opaque(nc);
  67
  68     return &n->vqs[nc->queue_index];
  69 }
  70
  71 static int vq2q(int queue_index)
  72 {
  73     return queue_index / 2;
  74 }
  75
  76 /* TODO
  77  * - we could suppress RX interrupt if we were so inclined.
  78  */
  79
  80 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
  81 {
  82     VirtIONet *n = VIRTIO_NET(vdev);
  83     struct virtio_net_config netcfg;
  84
  85     virtio_stw_p(vdev, &netcfg.status, n->status);
  86     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
  87     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
  88     memcpy(netcfg.mac, n->mac, ETH_ALEN);
  89     memcpy(config, &netcfg, n->config_size);
  90 }
  91
  92 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
  93 {
  94     VirtIONet *n = VIRTIO_NET(vdev);
  95     struct virtio_net_config netcfg = {};
  96
  97     memcpy(&netcfg, config, n->config_size);
  98
  99     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
 100         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
 101         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
 102         memcpy(n->mac, netcfg.mac, ETH_ALEN);
 103         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 104     }
 105 }
 106
 107 static bool virtio_net_started(VirtIONet *n, uint8_t status)
 108 {
 109     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 110     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 111         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 112 }
 113
 114 static void virtio_net_announce_timer(void *opaque)
 115 {
 116     VirtIONet *n = opaque;
 117     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 118
 119     n->announce_counter--;
 120     n->status |= VIRTIO_NET_S_ANNOUNCE;
 121     virtio_notify_config(vdev);
 122 }
 123
 124 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 125 {
 126     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 127     NetClientState *nc = qemu_get_queue(n->nic);
 128     int queues = n->multiqueue ? n->max_queues : 1;
 129
 130     if (!get_vhost_net(nc->peer)) {
 131         return;
 132     }
 133
 134     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 135         !!n->vhost_started) {
 136         return;
 137     }
 138     if (!n->vhost_started) {
 139         int r, i;
 140
 141         if (n->needs_vnet_hdr_swap) {
 142             error_report("backend does not support %s vnet headers; "
 143                          "falling back on userspace virtio",
 144                          virtio_is_big_endian(vdev) ? "BE" : "LE");
 145             return;
 146         }
 147
 148         /* Any packets outstanding? Purge them to avoid touching rings
 149          * when vhost is running.
 150          */
 151         for (i = 0;  i < queues; i++) {
 152             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 153
 154             /* Purge both directions: TX and RX. */
 155             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 156             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 157         }
 158
 159         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
 160             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
 161             if (r < 0) {
 162                 error_report("%uBytes MTU not supported by the backend",
 163                              n->net_conf.mtu);
 164
 165                 return;
 166             }
 167         }
 168
 169         n->vhost_started = 1;
 170         r = vhost_net_start(vdev, n->nic->ncs, queues);
 171         if (r < 0) {
 172             error_report("unable to start vhost net: %d: "
 173                          "falling back on userspace virtio", -r);
 174             n->vhost_started = 0;
 175         }
 176     } else {
 177         vhost_net_stop(vdev, n->nic->ncs, queues);
 178         n->vhost_started = 0;
 179     }
 180 }
 181
 182 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 183                                           NetClientState *peer,
 184                                           bool enable)
 185 {
 186     if (virtio_is_big_endian(vdev)) {
 187         return qemu_set_vnet_be(peer, enable);
 188     } else {
 189         return qemu_set_vnet_le(peer, enable);
 190     }
 191 }
 192
 193 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 194                                        int queues, bool enable)
 195 {
 196     int i;
 197
 198     for (i = 0; i < queues; i++) {
 199         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 200             enable) {
 201             while (--i >= 0) {
 202                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 203             }
 204
 205             return true;
 206         }
 207     }
 208
 209     return false;
 210 }
 211
 212 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 213 {
 214     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 215     int queues = n->multiqueue ? n->max_queues : 1;
 216
 217     if (virtio_net_started(n, status)) {
 218         /* Before using the device, we tell the network backend about the
 219          * endianness to use when parsing vnet headers. If the backend
 220          * can't do it, we fallback onto fixing the headers in the core
 221          * virtio-net code.
 222          */
 223         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 224                                                             queues, true);
 225     } else if (virtio_net_started(n, vdev->status)) {
 226         /* After using the device, we need to reset the network backend to
 227          * the default (guest native endianness), otherwise the guest may
 228          * lose network connectivity if it is rebooted into a different
 229          * endianness.
 230          */
 231         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
 232     }
 233 }
 234
 235 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 236 {
 237     unsigned int dropped = virtqueue_drop_all(vq);
 238     if (dropped) {
 239         virtio_notify(vdev, vq);
 240     }
 241 }
 242
 243 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 244 {
 245     VirtIONet *n = VIRTIO_NET(vdev);
 246     VirtIONetQueue *q;
 247     int i;
 248     uint8_t queue_status;
 249
 250     virtio_net_vnet_endian_status(n, status);
 251     virtio_net_vhost_status(n, status);
 252
 253     for (i = 0; i < n->max_queues; i++) {
 254         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 255         bool queue_started;
 256         q = &n->vqs[i];
 257
 258         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
 259             queue_status = 0;
 260         } else {
 261             queue_status = status;
 262         }
 263         queue_started =
 264             virtio_net_started(n, queue_status) && !n->vhost_started;
 265
 266         if (queue_started) {
 267             qemu_flush_queued_packets(ncs);
 268         }
 269
 270         if (!q->tx_waiting) {
 271             continue;
 272         }
 273
 274         if (queue_started) {
 275             if (q->tx_timer) {
 276                 timer_mod(q->tx_timer,
 277                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 278             } else {
 279                 qemu_bh_schedule(q->tx_bh);
 280             }
 281         } else {
 282             if (q->tx_timer) {
 283                 timer_del(q->tx_timer);
 284             } else {
 285                 qemu_bh_cancel(q->tx_bh);
 286             }
 287             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 288                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK)) {
 289                 /* if tx is waiting we are likely have some packets in tx queue
 290                  * and disabled notification */
 291                 q->tx_waiting = 0;
 292                 virtio_queue_set_notification(q->tx_vq, 1);
 293                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 294             }
 295         }
 296     }
 297 }
 298
 299 static void virtio_net_set_link_status(NetClientState *nc)
 300 {
 301     VirtIONet *n = qemu_get_nic_opaque(nc);
 302     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 303     uint16_t old_status = n->status;
 304
 305     if (nc->link_down)
 306         n->status &= ~VIRTIO_NET_S_LINK_UP;
 307     else
 308         n->status |= VIRTIO_NET_S_LINK_UP;
 309
 310     if (n->status != old_status)
 311         virtio_notify_config(vdev);
 312
 313     virtio_net_set_status(vdev, vdev->status);
 314 }
 315
 316 static void rxfilter_notify(NetClientState *nc)
 317 {
 318     VirtIONet *n = qemu_get_nic_opaque(nc);
 319
 320     if (nc->rxfilter_notify_enabled) {
 321         gchar *path = object_get_canonical_path(OBJECT(n->qdev));
 322         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
 323                                               n->netclient_name, path, &error_abort);
 324         g_free(path);
 325
 326         /* disable event notification to avoid events flooding */
 327         nc->rxfilter_notify_enabled = 0;
 328     }
 329 }
 330
 331 static intList *get_vlan_table(VirtIONet *n)
 332 {
 333     intList *list, *entry;
 334     int i, j;
 335
 336     list = NULL;
 337     for (i = 0; i < MAX_VLAN >> 5; i++) {
 338         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 339             if (n->vlans[i] & (1U << j)) {
 340                 entry = g_malloc0(sizeof(*entry));
 341                 entry->value = (i << 5) + j;
 342                 entry->next = list;
 343                 list = entry;
 344             }
 345         }
 346     }
 347
 348     return list;
 349 }
 350
 351 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 352 {
 353     VirtIONet *n = qemu_get_nic_opaque(nc);
 354     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 355     RxFilterInfo *info;
 356     strList *str_list, *entry;
 357     int i;
 358
 359     info = g_malloc0(sizeof(*info));
 360     info->name = g_strdup(nc->name);
 361     info->promiscuous = n->promisc;
 362
 363     if (n->nouni) {
 364         info->unicast = RX_STATE_NONE;
 365     } else if (n->alluni) {
 366         info->unicast = RX_STATE_ALL;
 367     } else {
 368         info->unicast = RX_STATE_NORMAL;
 369     }
 370
 371     if (n->nomulti) {
 372         info->multicast = RX_STATE_NONE;
 373     } else if (n->allmulti) {
 374         info->multicast = RX_STATE_ALL;
 375     } else {
 376         info->multicast = RX_STATE_NORMAL;
 377     }
 378
 379     info->broadcast_allowed = n->nobcast;
 380     info->multicast_overflow = n->mac_table.multi_overflow;
 381     info->unicast_overflow = n->mac_table.uni_overflow;
 382
 383     info->main_mac = qemu_mac_strdup_printf(n->mac);
 384
 385     str_list = NULL;
 386     for (i = 0; i < n->mac_table.first_multi; i++) {
 387         entry = g_malloc0(sizeof(*entry));
 388         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
 389         entry->next = str_list;
 390         str_list = entry;
 391     }
 392     info->unicast_table = str_list;
 393
 394     str_list = NULL;
 395     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 396         entry = g_malloc0(sizeof(*entry));
 397         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
 398         entry->next = str_list;
 399         str_list = entry;
 400     }
 401     info->multicast_table = str_list;
 402     info->vlan_table = get_vlan_table(n);
 403
 404     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 405         info->vlan = RX_STATE_ALL;
 406     } else if (!info->vlan_table) {
 407         info->vlan = RX_STATE_NONE;
 408     } else {
 409         info->vlan = RX_STATE_NORMAL;
 410     }
 411
 412     /* enable event notification after query */
 413     nc->rxfilter_notify_enabled = 1;
 414
 415     return info;
 416 }
 417
 418 static void virtio_net_reset(VirtIODevice *vdev)
 419 {
 420     VirtIONet *n = VIRTIO_NET(vdev);
 421
 422     /* Reset back to compatibility mode */
 423     n->promisc = 1;
 424     n->allmulti = 0;
 425     n->alluni = 0;
 426     n->nomulti = 0;
 427     n->nouni = 0;
 428     n->nobcast = 0;
 429     /* multiqueue is disabled by default */
 430     n->curr_queues = 1;
 431     timer_del(n->announce_timer);
 432     n->announce_counter = 0;
 433     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 434
 435     /* Flush any MAC and VLAN filter table state */
 436     n->mac_table.in_use = 0;
 437     n->mac_table.first_multi = 0;
 438     n->mac_table.multi_overflow = 0;
 439     n->mac_table.uni_overflow = 0;
 440     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 441     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 442     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 443     memset(n->vlans, 0, MAX_VLAN >> 3);
 444 }
 445
 446 static void peer_test_vnet_hdr(VirtIONet *n)
 447 {
 448     NetClientState *nc = qemu_get_queue(n->nic);
 449     if (!nc->peer) {
 450         return;
 451     }
 452
 453     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 454 }
 455
 456 static int peer_has_vnet_hdr(VirtIONet *n)
 457 {
 458     return n->has_vnet_hdr;
 459 }
 460
 461 static int peer_has_ufo(VirtIONet *n)
 462 {
 463     if (!peer_has_vnet_hdr(n))
 464         return 0;
 465
 466     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 467
 468     return n->has_ufo;
 469 }
 470
 471 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 472                                        int version_1)
 473 {
 474     int i;
 475     NetClientState *nc;
 476
 477     n->mergeable_rx_bufs = mergeable_rx_bufs;
 478
 479     if (version_1) {
 480         n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 481     } else {
 482         n->guest_hdr_len = n->mergeable_rx_bufs ?
 483             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 484             sizeof(struct virtio_net_hdr);
 485     }
 486
 487     for (i = 0; i < n->max_queues; i++) {
 488         nc = qemu_get_subqueue(n->nic, i);
 489
 490         if (peer_has_vnet_hdr(n) &&
 491             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 492             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 493             n->host_hdr_len = n->guest_hdr_len;
 494         }
 495     }
 496 }
 497
 498 static int peer_attach(VirtIONet *n, int index)
 499 {
 500     NetClientState *nc = qemu_get_subqueue(n->nic, index);
 501
 502     if (!nc->peer) {
 503         return 0;
 504     }
 505
 506     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 507         vhost_set_vring_enable(nc->peer, 1);
 508     }
 509
 510     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 511         return 0;
 512     }
 513
 514     if (n->max_queues == 1) {
 515         return 0;
 516     }
 517
 518     return tap_enable(nc->peer);
 519 }
 520
 521 static int peer_detach(VirtIONet *n, int index)
 522 {
 523     NetClientState *nc = qemu_get_subqueue(n->nic, index);
 524
 525     if (!nc->peer) {
 526         return 0;
 527     }
 528
 529     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 530         vhost_set_vring_enable(nc->peer, 0);
 531     }
 532
 533     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 534         return 0;
 535     }
 536
 537     return tap_disable(nc->peer);
 538 }
 539
 540 static void virtio_net_set_queues(VirtIONet *n)
 541 {
 542     int i;
 543     int r;
 544
 545     if (n->nic->peer_deleted) {
 546         return;
 547     }
 548
 549     for (i = 0; i < n->max_queues; i++) {
 550         if (i < n->curr_queues) {
 551             r = peer_attach(n, i);
 552             assert(!r);
 553         } else {
 554             r = peer_detach(n, i);
 555             assert(!r);
 556         }
 557     }
 558 }
 559
 560 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 561
 562 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 563                                         Error **errp)
 564 {
 565     VirtIONet *n = VIRTIO_NET(vdev);
 566     NetClientState *nc = qemu_get_queue(n->nic);
 567
 568     /* Firstly sync all virtio-net possible supported features */
 569     features |= n->host_features;
 570
 571     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 572
 573     if (!peer_has_vnet_hdr(n)) {
 574         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 575         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 576         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 577         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 578
 579         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 580         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 581         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 582         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 583     }
 584
 585     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 586         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 587         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 588     }
 589
 590     if (!get_vhost_net(nc->peer)) {
 591         return features;
 592     }
 593     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 594     vdev->backend_features = features;
 595
 596     if (n->mtu_bypass_backend &&
 597             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
 598         features |= (1ULL << VIRTIO_NET_F_MTU);
 599     }
 600
 601     return features;
 602 }
 603
 604 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 605 {
 606     uint64_t features = 0;
 607
 608     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 609      * but also these: */
 610     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 611     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 612     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 613     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 614     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 615
 616     return features;
 617 }
 618
 619 static void virtio_net_apply_guest_offloads(VirtIONet *n)
 620 {
 621     qemu_set_offload(qemu_get_queue(n->nic)->peer,
 622             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 623             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 624             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 625             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 626             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 627 }
 628
 629 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 630 {
 631     static const uint64_t guest_offloads_mask =
 632         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 633         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 634         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 635         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 636         (1ULL << VIRTIO_NET_F_GUEST_UFO);
 637
 638     return guest_offloads_mask & features;
 639 }
 640
 641 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 642 {
 643     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 644     return virtio_net_guest_offloads_by_features(vdev->guest_features);
 645 }
 646
 647 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 648 {
 649     VirtIONet *n = VIRTIO_NET(vdev);
 650     int i;
 651
 652     if (n->mtu_bypass_backend &&
 653             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
 654         features &= ~(1ULL << VIRTIO_NET_F_MTU);
 655     }
 656
 657     virtio_net_set_multiqueue(n,
 658                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
 659
 660     virtio_net_set_mrg_rx_bufs(n,
 661                                virtio_has_feature(features,
 662                                                   VIRTIO_NET_F_MRG_RXBUF),
 663                                virtio_has_feature(features,
 664                                                   VIRTIO_F_VERSION_1));
 665
 666     if (n->has_vnet_hdr) {
 667         n->curr_guest_offloads =
 668             virtio_net_guest_offloads_by_features(features);
 669         virtio_net_apply_guest_offloads(n);
 670     }
 671
 672     for (i = 0;  i < n->max_queues; i++) {
 673         NetClientState *nc = qemu_get_subqueue(n->nic, i);
 674
 675         if (!get_vhost_net(nc->peer)) {
 676             continue;
 677         }
 678         vhost_net_ack_features(get_vhost_net(nc->peer), features);
 679     }
 680
 681     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
 682         memset(n->vlans, 0, MAX_VLAN >> 3);
 683     } else {
 684         memset(n->vlans, 0xff, MAX_VLAN >> 3);
 685     }
 686 }
 687
 688 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
 689                                      struct iovec *iov, unsigned int iov_cnt)
 690 {
 691     uint8_t on;
 692     size_t s;
 693     NetClientState *nc = qemu_get_queue(n->nic);
 694
 695     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
 696     if (s != sizeof(on)) {
 697         return VIRTIO_NET_ERR;
 698     }
 699
 700     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
 701         n->promisc = on;
 702     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
 703         n->allmulti = on;
 704     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
 705         n->alluni = on;
 706     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
 707         n->nomulti = on;
 708     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
 709         n->nouni = on;
 710     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
 711         n->nobcast = on;
 712     } else {
 713         return VIRTIO_NET_ERR;
 714     }
 715
 716     rxfilter_notify(nc);
 717
 718     return VIRTIO_NET_OK;
 719 }
 720
 721 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
 722                                      struct iovec *iov, unsigned int iov_cnt)
 723 {
 724     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 725     uint64_t offloads;
 726     size_t s;
 727
 728     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
 729         return VIRTIO_NET_ERR;
 730     }
 731
 732     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
 733     if (s != sizeof(offloads)) {
 734         return VIRTIO_NET_ERR;
 735     }
 736
 737     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
 738         uint64_t supported_offloads;
 739
 740         if (!n->has_vnet_hdr) {
 741             return VIRTIO_NET_ERR;
 742         }
 743
 744         supported_offloads = virtio_net_supported_guest_offloads(n);
 745         if (offloads & ~supported_offloads) {
 746             return VIRTIO_NET_ERR;
 747         }
 748
 749         n->curr_guest_offloads = offloads;
 750         virtio_net_apply_guest_offloads(n);
 751
 752         return VIRTIO_NET_OK;
 753     } else {
 754         return VIRTIO_NET_ERR;
 755     }
 756 }
 757
 758 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
 759                                  struct iovec *iov, unsigned int iov_cnt)
 760 {
 761     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 762     struct virtio_net_ctrl_mac mac_data;
 763     size_t s;
 764     NetClientState *nc = qemu_get_queue(n->nic);
 765
 766     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
 767         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
 768             return VIRTIO_NET_ERR;
 769         }
 770         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
 771         assert(s == sizeof(n->mac));
 772         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 773         rxfilter_notify(nc);
 774
 775         return VIRTIO_NET_OK;
 776     }
 777
 778     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
 779         return VIRTIO_NET_ERR;
 780     }
 781
 782     int in_use = 0;
 783     int first_multi = 0;
 784     uint8_t uni_overflow = 0;
 785     uint8_t multi_overflow = 0;
 786     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
 787
 788     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
 789                    sizeof(mac_data.entries));
 790     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
 791     if (s != sizeof(mac_data.entries)) {
 792         goto error;
 793     }
 794     iov_discard_front(&iov, &iov_cnt, s);
 795
 796     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
 797         goto error;
 798     }
 799
 800     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
 801         s = iov_to_buf(iov, iov_cnt, 0, macs,
 802                        mac_data.entries * ETH_ALEN);
 803         if (s != mac_data.entries * ETH_ALEN) {
 804             goto error;
 805         }
 806         in_use += mac_data.entries;
 807     } else {
 808         uni_overflow = 1;
 809     }
 810
 811     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
 812
 813     first_multi = in_use;
 814
 815     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
 816                    sizeof(mac_data.entries));
 817     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
 818     if (s != sizeof(mac_data.entries)) {
 819         goto error;
 820     }
 821
 822     iov_discard_front(&iov, &iov_cnt, s);
 823
 824     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
 825         goto error;
 826     }
 827
 828     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
 829         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
 830                        mac_data.entries * ETH_ALEN);
 831         if (s != mac_data.entries * ETH_ALEN) {
 832             goto error;
 833         }
 834         in_use += mac_data.entries;
 835     } else {
 836         multi_overflow = 1;
 837     }
 838
 839     n->mac_table.in_use = in_use;
 840     n->mac_table.first_multi = first_multi;
 841     n->mac_table.uni_overflow = uni_overflow;
 842     n->mac_table.multi_overflow = multi_overflow;
 843     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
 844     g_free(macs);
 845     rxfilter_notify(nc);
 846
 847     return VIRTIO_NET_OK;
 848
 849 error:
 850     g_free(macs);
 851     return VIRTIO_NET_ERR;
 852 }
 853
 854 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
 855                                         struct iovec *iov, unsigned int iov_cnt)
 856 {
 857     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 858     uint16_t vid;
 859     size_t s;
 860     NetClientState *nc = qemu_get_queue(n->nic);
 861
 862     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
 863     vid = virtio_lduw_p(vdev, &vid);
 864     if (s != sizeof(vid)) {
 865         return VIRTIO_NET_ERR;
 866     }
 867
 868     if (vid >= MAX_VLAN)
 869         return VIRTIO_NET_ERR;
 870
 871     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
 872         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
 873     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
 874         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
 875     else
 876         return VIRTIO_NET_ERR;
 877
 878     rxfilter_notify(nc);
 879
 880     return VIRTIO_NET_OK;
 881 }
 882
 883 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
 884                                       struct iovec *iov, unsigned int iov_cnt)
 885 {
 886     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
 887         n->status & VIRTIO_NET_S_ANNOUNCE) {
 888         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 889         if (n->announce_counter) {
 890             timer_mod(n->announce_timer,
 891                       qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
 892                       self_announce_delay(n->announce_counter));
 893         }
 894         return VIRTIO_NET_OK;
 895     } else {
 896         return VIRTIO_NET_ERR;
 897     }
 898 }
 899
 900 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
 901                                 struct iovec *iov, unsigned int iov_cnt)
 902 {
 903     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 904     struct virtio_net_ctrl_mq mq;
 905     size_t s;
 906     uint16_t queues;
 907
 908     s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
 909     if (s != sizeof(mq)) {
 910         return VIRTIO_NET_ERR;
 911     }
 912
 913     if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
 914         return VIRTIO_NET_ERR;
 915     }
 916
 917     queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
 918
 919     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
 920         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
 921         queues > n->max_queues ||
 922         !n->multiqueue) {
 923         return VIRTIO_NET_ERR;
 924     }
 925
 926     n->curr_queues = queues;
 927     /* stop the backend before changing the number of queues to avoid handling a
 928      * disabled queue */
 929     virtio_net_set_status(vdev, vdev->status);
 930     virtio_net_set_queues(n);
 931
 932     return VIRTIO_NET_OK;
 933 }
 934
 935 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
 936 {
 937     VirtIONet *n = VIRTIO_NET(vdev);
 938     struct virtio_net_ctrl_hdr ctrl;
 939     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
 940     VirtQueueElement *elem;
 941     size_t s;
 942     struct iovec *iov, *iov2;
 943     unsigned int iov_cnt;
 944
 945     for (;;) {
 946         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
 947         if (!elem) {
 948             break;
 949         }
 950         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
 951             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
 952             virtio_error(vdev, "virtio-net ctrl missing headers");
 953             virtqueue_detach_element(vq, elem, 0);
 954             g_free(elem);
 955             break;
 956         }
 957
 958         iov_cnt = elem->out_num;
 959         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
 960         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
 961         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
 962         if (s != sizeof(ctrl)) {
 963             status = VIRTIO_NET_ERR;
 964         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
 965             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
 966         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
 967             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
 968         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
 969             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
 970         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
 971             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
 972         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
 973             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
 974         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
 975             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
 976         }
 977
 978         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
 979         assert(s == sizeof(status));
 980
 981         virtqueue_push(vq, elem, sizeof(status));
 982         virtio_notify(vdev, vq);
 983         g_free(iov2);
 984         g_free(elem);
 985     }
 986 }
 987
 988 /* RX */
 989
 990 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
 991 {
 992     VirtIONet *n = VIRTIO_NET(vdev);
 993     int queue_index = vq2q(virtio_get_queue_index(vq));
 994
 995     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
 996 }
 997
 998 static int virtio_net_can_receive(NetClientState *nc)
 999 {
1000     VirtIONet *n = qemu_get_nic_opaque(nc);
1001     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1002     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1003
1004     if (!vdev->vm_running) {
1005         return 0;
1006     }
1007
1008     if (nc->queue_index >= n->curr_queues) {
1009         return 0;
1010     }
1011
1012     if (!virtio_queue_ready(q->rx_vq) ||
1013         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1014         return 0;
1015     }
1016
1017     return 1;
1018 }
1019
1020 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1021 {
1022     VirtIONet *n = q->n;
1023     if (virtio_queue_empty(q->rx_vq) ||
1024         (n->mergeable_rx_bufs &&
1025          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1026         virtio_queue_set_notification(q->rx_vq, 1);
1027
1028         /* To avoid a race condition where the guest has made some buffers
1029          * available after the above check but before notification was
1030          * enabled, check for available buffers again.
1031          */
1032         if (virtio_queue_empty(q->rx_vq) ||
1033             (n->mergeable_rx_bufs &&
1034              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1035             return 0;
1036         }
1037     }
1038
1039     virtio_queue_set_notification(q->rx_vq, 0);
1040     return 1;
1041 }
1042
1043 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1044 {
1045     virtio_tswap16s(vdev, &hdr->hdr_len);
1046     virtio_tswap16s(vdev, &hdr->gso_size);
1047     virtio_tswap16s(vdev, &hdr->csum_start);
1048     virtio_tswap16s(vdev, &hdr->csum_offset);
1049 }
1050
1051 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1052  * it never finds out that the packets don't have valid checksums.  This
1053  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1054  * fix this with Xen but it hasn't appeared in an upstream release of
1055  * dhclient yet.
1056  *
1057  * To avoid breaking existing guests, we catch udp packets and add
1058  * checksums.  This is terrible but it's better than hacking the guest
1059  * kernels.
1060  *
1061  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1062  * we should provide a mechanism to disable it to avoid polluting the host
1063  * cache.
1064  */
1065 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1066                                         uint8_t *buf, size_t size)
1067 {
1068     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1069         (size > 27 && size < 1500) && /* normal sized MTU */
1070         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1071         (buf[23] == 17) && /* ip.protocol == UDP */
1072         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1073         net_checksum_calculate(buf, size);
1074         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1075     }
1076 }
1077
1078 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1079                            const void *buf, size_t size)
1080 {
1081     if (n->has_vnet_hdr) {
1082         /* FIXME this cast is evil */
1083         void *wbuf = (void *)buf;
1084         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1085                                     size - n->host_hdr_len);
1086
1087         if (n->needs_vnet_hdr_swap) {
1088             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1089         }
1090         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1091     } else {
1092         struct virtio_net_hdr hdr = {
1093             .flags = 0,
1094             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1095         };
1096         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1097     }
1098 }
1099
1100 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1101 {
1102     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1103     static const uint8_t vlan[] = {0x81, 0x00};
1104     uint8_t *ptr = (uint8_t *)buf;
1105     int i;
1106
1107     if (n->promisc)
1108         return 1;
1109
1110     ptr += n->host_hdr_len;
1111
1112     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1113         int vid = lduw_be_p(ptr + 14) & 0xfff;
1114         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1115             return 0;
1116     }
1117
1118     if (ptr[0] & 1) { // multicast
1119         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1120             return !n->nobcast;
1121         } else if (n->nomulti) {
1122             return 0;
1123         } else if (n->allmulti || n->mac_table.multi_overflow) {
1124             return 1;
1125         }
1126
1127         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1128             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1129                 return 1;
1130             }
1131         }
1132     } else { // unicast
1133         if (n->nouni) {
1134             return 0;
1135         } else if (n->alluni || n->mac_table.uni_overflow) {
1136             return 1;
1137         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1138             return 1;
1139         }
1140
1141         for (i = 0; i < n->mac_table.first_multi; i++) {
1142             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1143                 return 1;
1144             }
1145         }
1146     }
1147
1148     return 0;
1149 }
1150
1151 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1152                                       size_t size)
1153 {
1154     VirtIONet *n = qemu_get_nic_opaque(nc);
1155     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1156     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1157     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1158     struct virtio_net_hdr_mrg_rxbuf mhdr;
1159     unsigned mhdr_cnt = 0;
1160     size_t offset, i, guest_offset;
1161
1162     if (!virtio_net_can_receive(nc)) {
1163         return -1;
1164     }
1165
1166     /* hdr_len refers to the header we supply to the guest */
1167     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1168         return 0;
1169     }
1170
1171     if (!receive_filter(n, buf, size))
1172         return size;
1173
1174     offset = i = 0;
1175
1176     while (offset < size) {
1177         VirtQueueElement *elem;
1178         int len, total;
1179         const struct iovec *sg;
1180
1181         total = 0;
1182
1183         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1184         if (!elem) {
1185             if (i) {
1186                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1187                              "i %zd mergeable %d offset %zd, size %zd, "
1188                              "guest hdr len %zd, host hdr len %zd "
1189                              "guest features 0x%" PRIx64,
1190                              i, n->mergeable_rx_bufs, offset, size,
1191                              n->guest_hdr_len, n->host_hdr_len,
1192                              vdev->guest_features);
1193             }
1194             return -1;
1195         }
1196
1197         if (elem->in_num < 1) {
1198             virtio_error(vdev,
1199                          "virtio-net receive queue contains no in buffers");
1200             virtqueue_detach_element(q->rx_vq, elem, 0);
1201             g_free(elem);
1202             return -1;
1203         }
1204
1205         sg = elem->in_sg;
1206         if (i == 0) {
1207             assert(offset == 0);
1208             if (n->mergeable_rx_bufs) {
1209                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1210                                     sg, elem->in_num,
1211                                     offsetof(typeof(mhdr), num_buffers),
1212                                     sizeof(mhdr.num_buffers));
1213             }
1214
1215             receive_header(n, sg, elem->in_num, buf, size);
1216             offset = n->host_hdr_len;
1217             total += n->guest_hdr_len;
1218             guest_offset = n->guest_hdr_len;
1219         } else {
1220             guest_offset = 0;
1221         }
1222
1223         /* copy in packet.  ugh */
1224         len = iov_from_buf(sg, elem->in_num, guest_offset,
1225                            buf + offset, size - offset);
1226         total += len;
1227         offset += len;
1228         /* If buffers can't be merged, at this point we
1229          * must have consumed the complete packet.
1230          * Otherwise, drop it. */
1231         if (!n->mergeable_rx_bufs && offset < size) {
1232             virtqueue_unpop(q->rx_vq, elem, total);
1233             g_free(elem);
1234             return size;
1235         }
1236
1237         /* signal other side */
1238         virtqueue_fill(q->rx_vq, elem, total, i++);
1239         g_free(elem);
1240     }
1241
1242     if (mhdr_cnt) {
1243         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1244         iov_from_buf(mhdr_sg, mhdr_cnt,
1245                      0,
1246                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1247     }
1248
1249     virtqueue_flush(q->rx_vq, i);
1250     virtio_notify(vdev, q->rx_vq);
1251
1252     return size;
1253 }
1254
1255 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
1256                                   size_t size)
1257 {
1258     ssize_t r;
1259
1260     rcu_read_lock();
1261     r = virtio_net_receive_rcu(nc, buf, size);
1262     rcu_read_unlock();
1263     return r;
1264 }
1265
1266 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
1267
1268 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
1269 {
1270     VirtIONet *n = qemu_get_nic_opaque(nc);
1271     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1272     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1273
1274     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
1275     virtio_notify(vdev, q->tx_vq);
1276
1277     g_free(q->async_tx.elem);
1278     q->async_tx.elem = NULL;
1279
1280     virtio_queue_set_notification(q->tx_vq, 1);
1281     virtio_net_flush_tx(q);
1282 }
1283
1284 /* TX */
1285 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
1286 {
1287     VirtIONet *n = q->n;
1288     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1289     VirtQueueElement *elem;
1290     int32_t num_packets = 0;
1291     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
1292     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1293         return num_packets;
1294     }
1295
1296     if (q->async_tx.elem) {
1297         virtio_queue_set_notification(q->tx_vq, 0);
1298         return num_packets;
1299     }
1300
1301     for (;;) {
1302         ssize_t ret;
1303         unsigned int out_num;
1304         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
1305         struct virtio_net_hdr_mrg_rxbuf mhdr;
1306
1307         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
1308         if (!elem) {
1309             break;
1310         }
1311
1312         out_num = elem->out_num;
1313         out_sg = elem->out_sg;
1314         if (out_num < 1) {
1315             virtio_error(vdev, "virtio-net header not in first element");
1316             virtqueue_detach_element(q->tx_vq, elem, 0);
1317             g_free(elem);
1318             return -EINVAL;
1319         }
1320
1321         if (n->has_vnet_hdr) {
1322             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
1323                 n->guest_hdr_len) {
1324                 virtio_error(vdev, "virtio-net header incorrect");
1325                 virtqueue_detach_element(q->tx_vq, elem, 0);
1326                 g_free(elem);
1327                 return -EINVAL;
1328             }
1329             if (n->needs_vnet_hdr_swap) {
1330                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
1331                 sg2[0].iov_base = &mhdr;
1332                 sg2[0].iov_len = n->guest_hdr_len;
1333                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
1334                                    out_sg, out_num,
1335                                    n->guest_hdr_len, -1);
1336                 if (out_num == VIRTQUEUE_MAX_SIZE) {
1337                     goto drop;
1338                 }
1339                 out_num += 1;
1340                 out_sg = sg2;
1341             }
1342         }
1343         /*
1344          * If host wants to see the guest header as is, we can
1345          * pass it on unchanged. Otherwise, copy just the parts
1346          * that host is interested in.
1347          */
1348         assert(n->host_hdr_len <= n->guest_hdr_len);
1349         if (n->host_hdr_len != n->guest_hdr_len) {
1350             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
1351                                        out_sg, out_num,
1352                                        0, n->host_hdr_len);
1353             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
1354                              out_sg, out_num,
1355                              n->guest_hdr_len, -1);
1356             out_num = sg_num;
1357             out_sg = sg;
1358         }
1359
1360         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
1361                                       out_sg, out_num, virtio_net_tx_complete);
1362         if (ret == 0) {
1363             virtio_queue_set_notification(q->tx_vq, 0);
1364             q->async_tx.elem = elem;
1365             return -EBUSY;
1366         }
1367
1368 drop:
1369         virtqueue_push(q->tx_vq, elem, 0);
1370         virtio_notify(vdev, q->tx_vq);
1371         g_free(elem);
1372
1373         if (++num_packets >= n->tx_burst) {
1374             break;
1375         }
1376     }
1377     return num_packets;
1378 }
1379
1380 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
1381 {
1382     VirtIONet *n = VIRTIO_NET(vdev);
1383     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
1384
1385     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
1386         virtio_net_drop_tx_queue_data(vdev, vq);
1387         return;
1388     }
1389
1390     /* This happens when device was stopped but VCPU wasn't. */
1391     if (!vdev->vm_running) {
1392         q->tx_waiting = 1;
1393         return;
1394     }
1395
1396     if (q->tx_waiting) {
1397         virtio_queue_set_notification(vq, 1);
1398         timer_del(q->tx_timer);
1399         q->tx_waiting = 0;
1400         if (virtio_net_flush_tx(q) == -EINVAL) {
1401             return;
1402         }
1403     } else {
1404         timer_mod(q->tx_timer,
1405                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
1406         q->tx_waiting = 1;
1407         virtio_queue_set_notification(vq, 0);
1408     }
1409 }
1410
1411 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
1412 {
1413     VirtIONet *n = VIRTIO_NET(vdev);
1414     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
1415
1416     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
1417         virtio_net_drop_tx_queue_data(vdev, vq);
1418         return;
1419     }
1420
1421     if (unlikely(q->tx_waiting)) {
1422         return;
1423     }
1424     q->tx_waiting = 1;
1425     /* This happens when device was stopped but VCPU wasn't. */
1426     if (!vdev->vm_running) {
1427         return;
1428     }
1429     virtio_queue_set_notification(vq, 0);
1430     qemu_bh_schedule(q->tx_bh);
1431 }
1432
1433 static void virtio_net_tx_timer(void *opaque)
1434 {
1435     VirtIONetQueue *q = opaque;
1436     VirtIONet *n = q->n;
1437     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1438     /* This happens when device was stopped but BH wasn't. */
1439     if (!vdev->vm_running) {
1440         /* Make sure tx waiting is set, so we'll run when restarted. */
1441         assert(q->tx_waiting);
1442         return;
1443     }
1444
1445     q->tx_waiting = 0;
1446
1447     /* Just in case the driver is not ready on more */
1448     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1449         return;
1450     }
1451
1452     virtio_queue_set_notification(q->tx_vq, 1);
1453     virtio_net_flush_tx(q);
1454 }
1455
1456 static void virtio_net_tx_bh(void *opaque)
1457 {
1458     VirtIONetQueue *q = opaque;
1459     VirtIONet *n = q->n;
1460     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1461     int32_t ret;
1462
1463     /* This happens when device was stopped but BH wasn't. */
1464     if (!vdev->vm_running) {
1465         /* Make sure tx waiting is set, so we'll run when restarted. */
1466         assert(q->tx_waiting);
1467         return;
1468     }
1469
1470     q->tx_waiting = 0;
1471
1472     /* Just in case the driver is not ready on more */
1473     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
1474         return;
1475     }
1476
1477     ret = virtio_net_flush_tx(q);
1478     if (ret == -EBUSY || ret == -EINVAL) {
1479         return; /* Notification re-enable handled by tx_complete or device
1480                  * broken */
1481     }
1482
1483     /* If we flush a full burst of packets, assume there are
1484      * more coming and immediately reschedule */
1485     if (ret >= n->tx_burst) {
1486         qemu_bh_schedule(q->tx_bh);
1487         q->tx_waiting = 1;
1488         return;
1489     }
1490
1491     /* If less than a full burst, re-enable notification and flush
1492      * anything that may have come in while we weren't looking.  If
1493      * we find something, assume the guest is still active and reschedule */
1494     virtio_queue_set_notification(q->tx_vq, 1);
1495     ret = virtio_net_flush_tx(q);
1496     if (ret == -EINVAL) {
1497         return;
1498     } else if (ret > 0) {
1499         virtio_queue_set_notification(q->tx_vq, 0);
1500         qemu_bh_schedule(q->tx_bh);
1501         q->tx_waiting = 1;
1502     }
1503 }
1504
1505 static void virtio_net_add_queue(VirtIONet *n, int index)
1506 {
1507     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1508
1509     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
1510                                            virtio_net_handle_rx);
1511     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
1512         n->vqs[index].tx_vq =
1513             virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
1514         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
1515                                               virtio_net_tx_timer,
1516                                               &n->vqs[index]);
1517     } else {
1518         n->vqs[index].tx_vq =
1519             virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
1520         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
1521     }
1522
1523     n->vqs[index].tx_waiting = 0;
1524     n->vqs[index].n = n;
1525 }
1526
1527 static void virtio_net_del_queue(VirtIONet *n, int index)
1528 {
1529     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1530     VirtIONetQueue *q = &n->vqs[index];
1531     NetClientState *nc = qemu_get_subqueue(n->nic, index);
1532
1533     qemu_purge_queued_packets(nc);
1534
1535     virtio_del_queue(vdev, index * 2);
1536     if (q->tx_timer) {
1537         timer_del(q->tx_timer);
1538         timer_free(q->tx_timer);
1539         q->tx_timer = NULL;
1540     } else {
1541         qemu_bh_delete(q->tx_bh);
1542         q->tx_bh = NULL;
1543     }
1544     q->tx_waiting = 0;
1545     virtio_del_queue(vdev, index * 2 + 1);
1546 }
1547
1548 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
1549 {
1550     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1551     int old_num_queues = virtio_get_num_queues(vdev);
1552     int new_num_queues = new_max_queues * 2 + 1;
1553     int i;
1554
1555     assert(old_num_queues >= 3);
1556     assert(old_num_queues % 2 == 1);
1557
1558     if (old_num_queues == new_num_queues) {
1559         return;
1560     }
1561
1562     /*
1563      * We always need to remove and add ctrl vq if
1564      * old_num_queues != new_num_queues. Remove ctrl_vq first,
1565      * and then we only enter one of the following too loops.
1566      */
1567     virtio_del_queue(vdev, old_num_queues - 1);
1568
1569     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
1570         /* new_num_queues < old_num_queues */
1571         virtio_net_del_queue(n, i / 2);
1572     }
1573
1574     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
1575         /* new_num_queues > old_num_queues */
1576         virtio_net_add_queue(n, i / 2);
1577     }
1578
1579     /* add ctrl_vq last */
1580     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1581 }
1582
1583 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
1584 {
1585     int max = multiqueue ? n->max_queues : 1;
1586
1587     n->multiqueue = multiqueue;
1588     virtio_net_change_num_queues(n, max);
1589
1590     virtio_net_set_queues(n);
1591 }
1592
1593 static int virtio_net_post_load_device(void *opaque, int version_id)
1594 {
1595     VirtIONet *n = opaque;
1596     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1597     int i, link_down;
1598
1599     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
1600                                virtio_vdev_has_feature(vdev,
1601                                                        VIRTIO_F_VERSION_1));
1602
1603     /* MAC_TABLE_ENTRIES may be different from the saved image */
1604     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
1605         n->mac_table.in_use = 0;
1606     }
1607
1608     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1609         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
1610     }
1611
1612     if (peer_has_vnet_hdr(n)) {
1613         virtio_net_apply_guest_offloads(n);
1614     }
1615
1616     virtio_net_set_queues(n);
1617
1618     /* Find the first multicast entry in the saved MAC filter */
1619     for (i = 0; i < n->mac_table.in_use; i++) {
1620         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
1621             break;
1622         }
1623     }
1624     n->mac_table.first_multi = i;
1625
1626     /* nc.link_down can't be migrated, so infer link_down according
1627      * to link status bit in n->status */
1628     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
1629     for (i = 0; i < n->max_queues; i++) {
1630         qemu_get_subqueue(n->nic, i)->link_down = link_down;
1631     }
1632
1633     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
1634         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
1635         n->announce_counter = SELF_ANNOUNCE_ROUNDS;
1636         timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
1637     }
1638
1639     return 0;
1640 }
1641
1642 /* tx_waiting field of a VirtIONetQueue */
1643 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
1644     .name = "virtio-net-queue-tx_waiting",
1645     .fields = (VMStateField[]) {
1646         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
1647         VMSTATE_END_OF_LIST()
1648    },
1649 };
1650
1651 static bool max_queues_gt_1(void *opaque, int version_id)
1652 {
1653     return VIRTIO_NET(opaque)->max_queues > 1;
1654 }
1655
1656 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
1657 {
1658     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
1659                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
1660 }
1661
1662 static bool mac_table_fits(void *opaque, int version_id)
1663 {
1664     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
1665 }
1666
1667 static bool mac_table_doesnt_fit(void *opaque, int version_id)
1668 {
1669     return !mac_table_fits(opaque, version_id);
1670 }
1671
1672 /* This temporary type is shared by all the WITH_TMP methods
1673  * although only some fields are used by each.
1674  */
1675 struct VirtIONetMigTmp {
1676     VirtIONet      *parent;
1677     VirtIONetQueue *vqs_1;
1678     uint16_t        curr_queues_1;
1679     uint8_t         has_ufo;
1680     uint32_t        has_vnet_hdr;
1681 };
1682
1683 /* The 2nd and subsequent tx_waiting flags are loaded later than
1684  * the 1st entry in the queues and only if there's more than one
1685  * entry.  We use the tmp mechanism to calculate a temporary
1686  * pointer and count and also validate the count.
1687  */
1688
1689 static void virtio_net_tx_waiting_pre_save(void *opaque)
1690 {
1691     struct VirtIONetMigTmp *tmp = opaque;
1692
1693     tmp->vqs_1 = tmp->parent->vqs + 1;
1694     tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
1695     if (tmp->parent->curr_queues == 0) {
1696         tmp->curr_queues_1 = 0;
1697     }
1698 }
1699
1700 static int virtio_net_tx_waiting_pre_load(void *opaque)
1701 {
1702     struct VirtIONetMigTmp *tmp = opaque;
1703
1704     /* Reuse the pointer setup from save */
1705     virtio_net_tx_waiting_pre_save(opaque);
1706
1707     if (tmp->parent->curr_queues > tmp->parent->max_queues) {
1708         error_report("virtio-net: curr_queues %x > max_queues %x",
1709             tmp->parent->curr_queues, tmp->parent->max_queues);
1710
1711         return -EINVAL;
1712     }
1713
1714     return 0; /* all good */
1715 }
1716
1717 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
1718     .name      = "virtio-net-tx_waiting",
1719     .pre_load  = virtio_net_tx_waiting_pre_load,
1720     .pre_save  = virtio_net_tx_waiting_pre_save,
1721     .fields    = (VMStateField[]) {
1722         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
1723                                      curr_queues_1,
1724                                      vmstate_virtio_net_queue_tx_waiting,
1725                                      struct VirtIONetQueue),
1726         VMSTATE_END_OF_LIST()
1727     },
1728 };
1729
1730 /* the 'has_ufo' flag is just tested; if the incoming stream has the
1731  * flag set we need to check that we have it
1732  */
1733 static int virtio_net_ufo_post_load(void *opaque, int version_id)
1734 {
1735     struct VirtIONetMigTmp *tmp = opaque;
1736
1737     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
1738         error_report("virtio-net: saved image requires TUN_F_UFO support");
1739         return -EINVAL;
1740     }
1741
1742     return 0;
1743 }
1744
1745 static void virtio_net_ufo_pre_save(void *opaque)
1746 {
1747     struct VirtIONetMigTmp *tmp = opaque;
1748
1749     tmp->has_ufo = tmp->parent->has_ufo;
1750 }
1751
1752 static const VMStateDescription vmstate_virtio_net_has_ufo = {
1753     .name      = "virtio-net-ufo",
1754     .post_load = virtio_net_ufo_post_load,
1755     .pre_save  = virtio_net_ufo_pre_save,
1756     .fields    = (VMStateField[]) {
1757         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
1758         VMSTATE_END_OF_LIST()
1759     },
1760 };
1761
1762 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
1763  * flag set we need to check that we have it
1764  */
1765 static int virtio_net_vnet_post_load(void *opaque, int version_id)
1766 {
1767     struct VirtIONetMigTmp *tmp = opaque;
1768
1769     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
1770         error_report("virtio-net: saved image requires vnet_hdr=on");
1771         return -EINVAL;
1772     }
1773
1774     return 0;
1775 }
1776
1777 static void virtio_net_vnet_pre_save(void *opaque)
1778 {
1779     struct VirtIONetMigTmp *tmp = opaque;
1780
1781     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
1782 }
1783
1784 static const VMStateDescription vmstate_virtio_net_has_vnet = {
1785     .name      = "virtio-net-vnet",
1786     .post_load = virtio_net_vnet_post_load,
1787     .pre_save  = virtio_net_vnet_pre_save,
1788     .fields    = (VMStateField[]) {
1789         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
1790         VMSTATE_END_OF_LIST()
1791     },
1792 };
1793
1794 static const VMStateDescription vmstate_virtio_net_device = {
1795     .name = "virtio-net-device",
1796     .version_id = VIRTIO_NET_VM_VERSION,
1797     .minimum_version_id = VIRTIO_NET_VM_VERSION,
1798     .post_load = virtio_net_post_load_device,
1799     .fields = (VMStateField[]) {
1800         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
1801         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
1802                                vmstate_virtio_net_queue_tx_waiting,
1803                                VirtIONetQueue),
1804         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
1805         VMSTATE_UINT16(status, VirtIONet),
1806         VMSTATE_UINT8(promisc, VirtIONet),
1807         VMSTATE_UINT8(allmulti, VirtIONet),
1808         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
1809
1810         /* Guarded pair: If it fits we load it, else we throw it away
1811          * - can happen if source has a larger MAC table.; post-load
1812          *  sets flags in this case.
1813          */
1814         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
1815                                 0, mac_table_fits, mac_table.in_use,
1816                                  ETH_ALEN),
1817         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
1818                                      mac_table.in_use, ETH_ALEN),
1819
1820         /* Note: This is an array of uint32's that's always been saved as a
1821          * buffer; hold onto your endiannesses; it's actually used as a bitmap
1822          * but based on the uint.
1823          */
1824         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
1825         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
1826                          vmstate_virtio_net_has_vnet),
1827         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
1828         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
1829         VMSTATE_UINT8(alluni, VirtIONet),
1830         VMSTATE_UINT8(nomulti, VirtIONet),
1831         VMSTATE_UINT8(nouni, VirtIONet),
1832         VMSTATE_UINT8(nobcast, VirtIONet),
1833         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
1834                          vmstate_virtio_net_has_ufo),
1835         VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
1836                             vmstate_info_uint16_equal, uint16_t),
1837         VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
1838         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
1839                          vmstate_virtio_net_tx_waiting),
1840         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
1841                             has_ctrl_guest_offloads),
1842         VMSTATE_END_OF_LIST()
1843    },
1844 };
1845
1846 static NetClientInfo net_virtio_info = {
1847     .type = NET_CLIENT_DRIVER_NIC,
1848     .size = sizeof(NICState),
1849     .can_receive = virtio_net_can_receive,
1850     .receive = virtio_net_receive,
1851     .link_status_changed = virtio_net_set_link_status,
1852     .query_rx_filter = virtio_net_query_rxfilter,
1853 };
1854
1855 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
1856 {
1857     VirtIONet *n = VIRTIO_NET(vdev);
1858     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1859     assert(n->vhost_started);
1860     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
1861 }
1862
1863 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
1864                                            bool mask)
1865 {
1866     VirtIONet *n = VIRTIO_NET(vdev);
1867     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1868     assert(n->vhost_started);
1869     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
1870                              vdev, idx, mask);
1871 }
1872
1873 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
1874 {
1875     int i, config_size = 0;
1876     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
1877
1878     for (i = 0; feature_sizes[i].flags != 0; i++) {
1879         if (host_features & feature_sizes[i].flags) {
1880             config_size = MAX(feature_sizes[i].end, config_size);
1881         }
1882     }
1883     n->config_size = config_size;
1884 }
1885
1886 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
1887                                    const char *type)
1888 {
1889     /*
1890      * The name can be NULL, the netclient name will be type.x.
1891      */
1892     assert(type != NULL);
1893
1894     g_free(n->netclient_name);
1895     g_free(n->netclient_type);
1896     n->netclient_name = g_strdup(name);
1897     n->netclient_type = g_strdup(type);
1898 }
1899
1900 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
1901 {
1902     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1903     VirtIONet *n = VIRTIO_NET(dev);
1904     NetClientState *nc;
1905     int i;
1906
1907     if (n->net_conf.mtu) {
1908         n->host_features |= (0x1 << VIRTIO_NET_F_MTU);
1909     }
1910
1911     virtio_net_set_config_size(n, n->host_features);
1912     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
1913
1914     /*
1915      * We set a lower limit on RX queue size to what it always was.
1916      * Guests that want a smaller ring can always resize it without
1917      * help from us (using virtio 1 and up).
1918      */
1919     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
1920         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
1921         (n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) {
1922         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
1923                    "must be a power of 2 between %d and %d.",
1924                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
1925                    VIRTQUEUE_MAX_SIZE);
1926         virtio_cleanup(vdev);
1927         return;
1928     }
1929
1930     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
1931     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
1932         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
1933                    "must be a positive integer less than %d.",
1934                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
1935         virtio_cleanup(vdev);
1936         return;
1937     }
1938     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
1939     n->curr_queues = 1;
1940     n->tx_timeout = n->net_conf.txtimer;
1941
1942     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
1943                        && strcmp(n->net_conf.tx, "bh")) {
1944         error_report("virtio-net: "
1945                      "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
1946                      n->net_conf.tx);
1947         error_report("Defaulting to \"bh\"");
1948     }
1949
1950     for (i = 0; i < n->max_queues; i++) {
1951         virtio_net_add_queue(n, i);
1952     }
1953
1954     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1955     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
1956     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
1957     n->status = VIRTIO_NET_S_LINK_UP;
1958     n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1959                                      virtio_net_announce_timer, n);
1960
1961     if (n->netclient_type) {
1962         /*
1963          * Happen when virtio_net_set_netclient_name has been called.
1964          */
1965         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
1966                               n->netclient_type, n->netclient_name, n);
1967     } else {
1968         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
1969                               object_get_typename(OBJECT(dev)), dev->id, n);
1970     }
1971
1972     peer_test_vnet_hdr(n);
1973     if (peer_has_vnet_hdr(n)) {
1974         for (i = 0; i < n->max_queues; i++) {
1975             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
1976         }
1977         n->host_hdr_len = sizeof(struct virtio_net_hdr);
1978     } else {
1979         n->host_hdr_len = 0;
1980     }
1981
1982     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
1983
1984     n->vqs[0].tx_waiting = 0;
1985     n->tx_burst = n->net_conf.txburst;
1986     virtio_net_set_mrg_rx_bufs(n, 0, 0);
1987     n->promisc = 1; /* for compatibility */
1988
1989     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1990
1991     n->vlans = g_malloc0(MAX_VLAN >> 3);
1992
1993     nc = qemu_get_queue(n->nic);
1994     nc->rxfilter_notify_enabled = 1;
1995
1996     n->qdev = dev;
1997 }
1998
1999 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
2000 {
2001     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2002     VirtIONet *n = VIRTIO_NET(dev);
2003     int i, max_queues;
2004
2005     /* This will stop vhost backend if appropriate. */
2006     virtio_net_set_status(vdev, 0);
2007
2008     g_free(n->netclient_name);
2009     n->netclient_name = NULL;
2010     g_free(n->netclient_type);
2011     n->netclient_type = NULL;
2012
2013     g_free(n->mac_table.macs);
2014     g_free(n->vlans);
2015
2016     max_queues = n->multiqueue ? n->max_queues : 1;
2017     for (i = 0; i < max_queues; i++) {
2018         virtio_net_del_queue(n, i);
2019     }
2020
2021     timer_del(n->announce_timer);
2022     timer_free(n->announce_timer);
2023     g_free(n->vqs);
2024     qemu_del_nic(n->nic);
2025     virtio_cleanup(vdev);
2026 }
2027
2028 static void virtio_net_instance_init(Object *obj)
2029 {
2030     VirtIONet *n = VIRTIO_NET(obj);
2031
2032     /*
2033      * The default config_size is sizeof(struct virtio_net_config).
2034      * Can be overriden with virtio_net_set_config_size.
2035      */
2036     n->config_size = sizeof(struct virtio_net_config);
2037     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
2038                                   "bootindex", "/ethernet-phy@0",
2039                                   DEVICE(n), NULL);
2040 }
2041
2042 static void virtio_net_pre_save(void *opaque)
2043 {
2044     VirtIONet *n = opaque;
2045
2046     /* At this point, backend must be stopped, otherwise
2047      * it might keep writing to memory. */
2048     assert(!n->vhost_started);
2049 }
2050
2051 static const VMStateDescription vmstate_virtio_net = {
2052     .name = "virtio-net",
2053     .minimum_version_id = VIRTIO_NET_VM_VERSION,
2054     .version_id = VIRTIO_NET_VM_VERSION,
2055     .fields = (VMStateField[]) {
2056         VMSTATE_VIRTIO_DEVICE,
2057         VMSTATE_END_OF_LIST()
2058     },
2059     .pre_save = virtio_net_pre_save,
2060 };
2061
2062 static Property virtio_net_properties[] = {
2063     DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true),
2064     DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features,
2065                     VIRTIO_NET_F_GUEST_CSUM, true),
2066     DEFINE_PROP_BIT("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
2067     DEFINE_PROP_BIT("guest_tso4", VirtIONet, host_features,
2068                     VIRTIO_NET_F_GUEST_TSO4, true),
2069     DEFINE_PROP_BIT("guest_tso6", VirtIONet, host_features,
2070                     VIRTIO_NET_F_GUEST_TSO6, true),
2071     DEFINE_PROP_BIT("guest_ecn", VirtIONet, host_features,
2072                     VIRTIO_NET_F_GUEST_ECN, true),
2073     DEFINE_PROP_BIT("guest_ufo", VirtIONet, host_features,
2074                     VIRTIO_NET_F_GUEST_UFO, true),
2075     DEFINE_PROP_BIT("guest_announce", VirtIONet, host_features,
2076                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
2077     DEFINE_PROP_BIT("host_tso4", VirtIONet, host_features,
2078                     VIRTIO_NET_F_HOST_TSO4, true),
2079     DEFINE_PROP_BIT("host_tso6", VirtIONet, host_features,
2080                     VIRTIO_NET_F_HOST_TSO6, true),
2081     DEFINE_PROP_BIT("host_ecn", VirtIONet, host_features,
2082                     VIRTIO_NET_F_HOST_ECN, true),
2083     DEFINE_PROP_BIT("host_ufo", VirtIONet, host_features,
2084                     VIRTIO_NET_F_HOST_UFO, true),
2085     DEFINE_PROP_BIT("mrg_rxbuf", VirtIONet, host_features,
2086                     VIRTIO_NET_F_MRG_RXBUF, true),
2087     DEFINE_PROP_BIT("status", VirtIONet, host_features,
2088                     VIRTIO_NET_F_STATUS, true),
2089     DEFINE_PROP_BIT("ctrl_vq", VirtIONet, host_features,
2090                     VIRTIO_NET_F_CTRL_VQ, true),
2091     DEFINE_PROP_BIT("ctrl_rx", VirtIONet, host_features,
2092                     VIRTIO_NET_F_CTRL_RX, true),
2093     DEFINE_PROP_BIT("ctrl_vlan", VirtIONet, host_features,
2094                     VIRTIO_NET_F_CTRL_VLAN, true),
2095     DEFINE_PROP_BIT("ctrl_rx_extra", VirtIONet, host_features,
2096                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
2097     DEFINE_PROP_BIT("ctrl_mac_addr", VirtIONet, host_features,
2098                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
2099     DEFINE_PROP_BIT("ctrl_guest_offloads", VirtIONet, host_features,
2100                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
2101     DEFINE_PROP_BIT("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
2102     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
2103     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
2104                        TX_TIMER_INTERVAL),
2105     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
2106     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
2107     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
2108                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
2109     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
2110     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
2111                      true),
2112     DEFINE_PROP_END_OF_LIST(),
2113 };
2114
2115 static void virtio_net_class_init(ObjectClass *klass, void *data)
2116 {
2117     DeviceClass *dc = DEVICE_CLASS(klass);
2118     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2119
2120     dc->props = virtio_net_properties;
2121     dc->vmsd = &vmstate_virtio_net;
2122     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2123     vdc->realize = virtio_net_device_realize;
2124     vdc->unrealize = virtio_net_device_unrealize;
2125     vdc->get_config = virtio_net_get_config;
2126     vdc->set_config = virtio_net_set_config;
2127     vdc->get_features = virtio_net_get_features;
2128     vdc->set_features = virtio_net_set_features;
2129     vdc->bad_features = virtio_net_bad_features;
2130     vdc->reset = virtio_net_reset;
2131     vdc->set_status = virtio_net_set_status;
2132     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
2133     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
2134     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
2135     vdc->vmsd = &vmstate_virtio_net_device;
2136 }
2137
2138 static const TypeInfo virtio_net_info = {
2139     .name = TYPE_VIRTIO_NET,
2140     .parent = TYPE_VIRTIO_DEVICE,
2141     .instance_size = sizeof(VirtIONet),
2142     .instance_init = virtio_net_instance_init,
2143     .class_init = virtio_net_class_init,
2144 };
2145
2146 static void virtio_register_types(void)
2147 {
2148     type_register_static(&virtio_net_info);
2149 }
2150
2151 type_init(virtio_register_types)