#include "qemu/timer.h"
#include "hw/virtio/virtio-net.h"
#include "net/vhost_net.h"
+#include "net/announce.h"
#include "hw/virtio/virtio-bus.h"
-#include "qapi/qmp/qjson.h"
-#include "qapi-event.h"
+#include "qapi/error.h"
+#include "qapi/qapi-events-net.h"
#include "hw/virtio/virtio-access.h"
#include "migration/misc.h"
+#include "standard-headers/linux/ethtool.h"
+#include "trace.h"
#define VIRTIO_NET_VM_VERSION 11
#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
-/*
- * Calculate the number of bytes up to and including the given 'field' of
- * 'container'.
- */
-#define endof(container, field) \
- (offsetof(container, field) + sizeof(((container *)0)->field))
+#define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
+
+#define VIRTIO_NET_TCP_FLAG 0x3F
+#define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
+
+/* IPv4 max payload, 16 bits in the header */
+#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
+#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
+
+/* header length value in ip header without option */
+#define VIRTIO_NET_IP4_HEADER_LENGTH 5
+
+#define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
+#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
+
+/* Purge coalesced packets timer interval, This value affects the performance
+ a lot, and should be tuned carefully, '300000'(300us) is the recommended
+ value to pass the WHQL test, '50000' can gain 2x netperf throughput with
+ tso/gso/gro 'off'. */
+#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
+
+/* temporary until standard header include it */
+#if !defined(VIRTIO_NET_HDR_F_RSC_INFO)
-typedef struct VirtIOFeature {
- uint32_t flags;
- size_t end;
-} VirtIOFeature;
+#define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc_ext data in csum_ fields */
+#define VIRTIO_NET_F_RSC_EXT 61
+
+static inline __virtio16 *virtio_net_rsc_ext_num_packets(
+ struct virtio_net_hdr *hdr)
+{
+ return &hdr->csum_start;
+}
+
+static inline __virtio16 *virtio_net_rsc_ext_num_dupacks(
+ struct virtio_net_hdr *hdr)
+{
+ return &hdr->csum_offset;
+}
+
+#endif
static VirtIOFeature feature_sizes[] = {
- {.flags = 1 << VIRTIO_NET_F_MAC,
- .end = endof(struct virtio_net_config, mac)},
- {.flags = 1 << VIRTIO_NET_F_STATUS,
- .end = endof(struct virtio_net_config, status)},
- {.flags = 1 << VIRTIO_NET_F_MQ,
- .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
- {.flags = 1 << VIRTIO_NET_F_MTU,
- .end = endof(struct virtio_net_config, mtu)},
+ {.flags = 1ULL << VIRTIO_NET_F_MAC,
+ .end = virtio_endof(struct virtio_net_config, mac)},
+ {.flags = 1ULL << VIRTIO_NET_F_STATUS,
+ .end = virtio_endof(struct virtio_net_config, status)},
+ {.flags = 1ULL << VIRTIO_NET_F_MQ,
+ .end = virtio_endof(struct virtio_net_config, max_virtqueue_pairs)},
+ {.flags = 1ULL << VIRTIO_NET_F_MTU,
+ .end = virtio_endof(struct virtio_net_config, mtu)},
+ {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
+ .end = virtio_endof(struct virtio_net_config, duplex)},
{}
};
virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
memcpy(netcfg.mac, n->mac, ETH_ALEN);
+ virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
+ netcfg.duplex = n->net_conf.duplex;
memcpy(config, &netcfg, n->config_size);
}
(n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
}
+static void virtio_net_announce_notify(VirtIONet *net)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(net);
+ trace_virtio_net_announce_notify();
+
+ net->status |= VIRTIO_NET_S_ANNOUNCE;
+ virtio_notify_config(vdev);
+}
+
static void virtio_net_announce_timer(void *opaque)
{
VirtIONet *n = opaque;
+ trace_virtio_net_announce_timer(n->announce_timer.round);
+
+ n->announce_timer.round--;
+ virtio_net_announce_notify(n);
+}
+
+static void virtio_net_announce(NetClientState *nc)
+{
+ VirtIONet *n = qemu_get_nic_opaque(nc);
VirtIODevice *vdev = VIRTIO_DEVICE(n);
- n->announce_counter--;
- n->status |= VIRTIO_NET_S_ANNOUNCE;
- virtio_notify_config(vdev);
+ /*
+ * Make sure the virtio migration announcement timer isn't running
+ * If it is, let it trigger announcement so that we do not cause
+ * confusion.
+ */
+ if (n->announce_timer.round) {
+ return;
+ }
+
+ if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
+ virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
+ virtio_net_announce_notify(n);
+ }
}
static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
qemu_bh_cancel(q->tx_bh);
}
if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
- (queue_status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+ (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
+ vdev->vm_running) {
/* if tx is waiting we are likely have some packets in tx queue
* and disabled notification */
q->tx_waiting = 0;
if (nc->rxfilter_notify_enabled) {
gchar *path = object_get_canonical_path(OBJECT(n->qdev));
qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
- n->netclient_name, path, &error_abort);
+ n->netclient_name, path);
g_free(path);
/* disable event notification to avoid events flooding */
static void virtio_net_reset(VirtIODevice *vdev)
{
VirtIONet *n = VIRTIO_NET(vdev);
+ int i;
/* Reset back to compatibility mode */
n->promisc = 1;
n->nobcast = 0;
/* multiqueue is disabled by default */
n->curr_queues = 1;
- timer_del(n->announce_timer);
- n->announce_counter = 0;
+ timer_del(n->announce_timer.tm);
+ n->announce_timer.round = 0;
n->status &= ~VIRTIO_NET_S_ANNOUNCE;
/* Flush any MAC and VLAN filter table state */
memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
memset(n->vlans, 0, MAX_VLAN >> 3);
+
+ /* Flush any async TX */
+ for (i = 0; i < n->max_queues; i++) {
+ NetClientState *nc = qemu_get_subqueue(n->nic, i);
+
+ if (nc->peer) {
+ qemu_flush_or_purge_queued_packets(nc->peer, true);
+ assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
+ }
+ }
}
static void peer_test_vnet_hdr(VirtIONet *n)
if (!get_vhost_net(nc->peer)) {
return features;
}
+
features = vhost_net_get_features(get_vhost_net(nc->peer), features);
vdev->backend_features = features;
virtio_has_feature(features,
VIRTIO_F_VERSION_1));
+ n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
+ virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
+ n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
+ virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
+
if (n->has_vnet_hdr) {
n->curr_guest_offloads =
virtio_net_guest_offloads_by_features(features);
if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
uint64_t supported_offloads;
+ offloads = virtio_ldq_p(vdev, &offloads);
+
if (!n->has_vnet_hdr) {
return VIRTIO_NET_ERR;
}
+ n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
+ virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
+ n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
+ virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
+ virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
+
supported_offloads = virtio_net_supported_guest_offloads(n);
if (offloads & ~supported_offloads) {
return VIRTIO_NET_ERR;
static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
struct iovec *iov, unsigned int iov_cnt)
{
+ trace_virtio_net_handle_announce(n->announce_timer.round);
if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
n->status & VIRTIO_NET_S_ANNOUNCE) {
n->status &= ~VIRTIO_NET_S_ANNOUNCE;
- if (n->announce_counter) {
- timer_mod(n->announce_timer,
- qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
- self_announce_delay(n->announce_counter));
+ if (n->announce_timer.round) {
+ qemu_announce_timer_step(&n->announce_timer);
}
return VIRTIO_NET_OK;
} else {
return size;
}
-static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
+static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
size_t size)
{
ssize_t r;
return r;
}
+static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
+ const uint8_t *buf,
+ VirtioNetRscUnit *unit)
+{
+ uint16_t ip_hdrlen;
+ struct ip_header *ip;
+
+ ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
+ + sizeof(struct eth_header));
+ unit->ip = (void *)ip;
+ ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
+ unit->ip_plen = &ip->ip_len;
+ unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
+ unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
+ unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
+}
+
+static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
+ const uint8_t *buf,
+ VirtioNetRscUnit *unit)
+{
+ struct ip6_header *ip6;
+
+ ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
+ + sizeof(struct eth_header));
+ unit->ip = ip6;
+ unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
+ unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
+ + sizeof(struct ip6_header));
+ unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
+
+ /* There is a difference between payload lenght in ipv4 and v6,
+ ip header is excluded in ipv6 */
+ unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
+}
+
+static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
+ VirtioNetRscSeg *seg)
+{
+ int ret;
+ struct virtio_net_hdr *h;
+
+ h = (struct virtio_net_hdr *)seg->buf;
+ h->flags = 0;
+ h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+ if (seg->is_coalesced) {
+ *virtio_net_rsc_ext_num_packets(h) = seg->packets;
+ *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack;
+ h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
+ if (chain->proto == ETH_P_IP) {
+ h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+ } else {
+ h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+ }
+ }
+
+ ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
+ QTAILQ_REMOVE(&chain->buffers, seg, next);
+ g_free(seg->buf);
+ g_free(seg);
+
+ return ret;
+}
+
+static void virtio_net_rsc_purge(void *opq)
+{
+ VirtioNetRscSeg *seg, *rn;
+ VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
+
+ QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
+ if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
+ chain->stat.purge_failed++;
+ continue;
+ }
+ }
+
+ chain->stat.timer++;
+ if (!QTAILQ_EMPTY(&chain->buffers)) {
+ timer_mod(chain->drain_timer,
+ qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
+ }
+}
+
+static void virtio_net_rsc_cleanup(VirtIONet *n)
+{
+ VirtioNetRscChain *chain, *rn_chain;
+ VirtioNetRscSeg *seg, *rn_seg;
+
+ QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
+ QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
+ QTAILQ_REMOVE(&chain->buffers, seg, next);
+ g_free(seg->buf);
+ g_free(seg);
+ }
+
+ timer_del(chain->drain_timer);
+ timer_free(chain->drain_timer);
+ QTAILQ_REMOVE(&n->rsc_chains, chain, next);
+ g_free(chain);
+ }
+}
+
+static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
+ NetClientState *nc,
+ const uint8_t *buf, size_t size)
+{
+ uint16_t hdr_len;
+ VirtioNetRscSeg *seg;
+
+ hdr_len = chain->n->guest_hdr_len;
+ seg = g_malloc(sizeof(VirtioNetRscSeg));
+ seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
+ + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
+ memcpy(seg->buf, buf, size);
+ seg->size = size;
+ seg->packets = 1;
+ seg->dup_ack = 0;
+ seg->is_coalesced = 0;
+ seg->nc = nc;
+
+ QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
+ chain->stat.cache++;
+
+ switch (chain->proto) {
+ case ETH_P_IP:
+ virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
+ break;
+ case ETH_P_IPV6:
+ virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
+ VirtioNetRscSeg *seg,
+ const uint8_t *buf,
+ struct tcp_header *n_tcp,
+ struct tcp_header *o_tcp)
+{
+ uint32_t nack, oack;
+ uint16_t nwin, owin;
+
+ nack = htonl(n_tcp->th_ack);
+ nwin = htons(n_tcp->th_win);
+ oack = htonl(o_tcp->th_ack);
+ owin = htons(o_tcp->th_win);
+
+ if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
+ chain->stat.ack_out_of_win++;
+ return RSC_FINAL;
+ } else if (nack == oack) {
+ /* duplicated ack or window probe */
+ if (nwin == owin) {
+ /* duplicated ack, add dup ack count due to whql test up to 1 */
+ chain->stat.dup_ack++;
+ return RSC_FINAL;
+ } else {
+ /* Coalesce window update */
+ o_tcp->th_win = n_tcp->th_win;
+ chain->stat.win_update++;
+ return RSC_COALESCE;
+ }
+ } else {
+ /* pure ack, go to 'C', finalize*/
+ chain->stat.pure_ack++;
+ return RSC_FINAL;
+ }
+}
+
+static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
+ VirtioNetRscSeg *seg,
+ const uint8_t *buf,
+ VirtioNetRscUnit *n_unit)
+{
+ void *data;
+ uint16_t o_ip_len;
+ uint32_t nseq, oseq;
+ VirtioNetRscUnit *o_unit;
+
+ o_unit = &seg->unit;
+ o_ip_len = htons(*o_unit->ip_plen);
+ nseq = htonl(n_unit->tcp->th_seq);
+ oseq = htonl(o_unit->tcp->th_seq);
+
+ /* out of order or retransmitted. */
+ if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
+ chain->stat.data_out_of_win++;
+ return RSC_FINAL;
+ }
+
+ data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
+ if (nseq == oseq) {
+ if ((o_unit->payload == 0) && n_unit->payload) {
+ /* From no payload to payload, normal case, not a dup ack or etc */
+ chain->stat.data_after_pure_ack++;
+ goto coalesce;
+ } else {
+ return virtio_net_rsc_handle_ack(chain, seg, buf,
+ n_unit->tcp, o_unit->tcp);
+ }
+ } else if ((nseq - oseq) != o_unit->payload) {
+ /* Not a consistent packet, out of order */
+ chain->stat.data_out_of_order++;
+ return RSC_FINAL;
+ } else {
+coalesce:
+ if ((o_ip_len + n_unit->payload) > chain->max_payload) {
+ chain->stat.over_size++;
+ return RSC_FINAL;
+ }
+
+ /* Here comes the right data, the payload length in v4/v6 is different,
+ so use the field value to update and record the new data len */
+ o_unit->payload += n_unit->payload; /* update new data len */
+
+ /* update field in ip header */
+ *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
+
+ /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
+ for windows guest, while this may change the behavior for linux
+ guest (only if it uses RSC feature). */
+ o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
+
+ o_unit->tcp->th_ack = n_unit->tcp->th_ack;
+ o_unit->tcp->th_win = n_unit->tcp->th_win;
+
+ memmove(seg->buf + seg->size, data, n_unit->payload);
+ seg->size += n_unit->payload;
+ seg->packets++;
+ chain->stat.coalesced++;
+ return RSC_COALESCE;
+ }
+}
+
+static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
+ VirtioNetRscSeg *seg,
+ const uint8_t *buf, size_t size,
+ VirtioNetRscUnit *unit)
+{
+ struct ip_header *ip1, *ip2;
+
+ ip1 = (struct ip_header *)(unit->ip);
+ ip2 = (struct ip_header *)(seg->unit.ip);
+ if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
+ || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
+ || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
+ chain->stat.no_match++;
+ return RSC_NO_MATCH;
+ }
+
+ return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
+}
+
+static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
+ VirtioNetRscSeg *seg,
+ const uint8_t *buf, size_t size,
+ VirtioNetRscUnit *unit)
+{
+ struct ip6_header *ip1, *ip2;
+
+ ip1 = (struct ip6_header *)(unit->ip);
+ ip2 = (struct ip6_header *)(seg->unit.ip);
+ if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
+ || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
+ || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
+ || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
+ chain->stat.no_match++;
+ return RSC_NO_MATCH;
+ }
+
+ return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
+}
+
+/* Packets with 'SYN' should bypass, other flag should be sent after drain
+ * to prevent out of order */
+static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
+ struct tcp_header *tcp)
+{
+ uint16_t tcp_hdr;
+ uint16_t tcp_flag;
+
+ tcp_flag = htons(tcp->th_offset_flags);
+ tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
+ tcp_flag &= VIRTIO_NET_TCP_FLAG;
+ tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
+ if (tcp_flag & TH_SYN) {
+ chain->stat.tcp_syn++;
+ return RSC_BYPASS;
+ }
+
+ if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
+ chain->stat.tcp_ctrl_drain++;
+ return RSC_FINAL;
+ }
+
+ if (tcp_hdr > sizeof(struct tcp_header)) {
+ chain->stat.tcp_all_opt++;
+ return RSC_FINAL;
+ }
+
+ return RSC_CANDIDATE;
+}
+
+static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
+ NetClientState *nc,
+ const uint8_t *buf, size_t size,
+ VirtioNetRscUnit *unit)
+{
+ int ret;
+ VirtioNetRscSeg *seg, *nseg;
+
+ if (QTAILQ_EMPTY(&chain->buffers)) {
+ chain->stat.empty_cache++;
+ virtio_net_rsc_cache_buf(chain, nc, buf, size);
+ timer_mod(chain->drain_timer,
+ qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
+ return size;
+ }
+
+ QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
+ if (chain->proto == ETH_P_IP) {
+ ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
+ } else {
+ ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
+ }
+
+ if (ret == RSC_FINAL) {
+ if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
+ /* Send failed */
+ chain->stat.final_failed++;
+ return 0;
+ }
+
+ /* Send current packet */
+ return virtio_net_do_receive(nc, buf, size);
+ } else if (ret == RSC_NO_MATCH) {
+ continue;
+ } else {
+ /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
+ seg->is_coalesced = 1;
+ return size;
+ }
+ }
+
+ chain->stat.no_match_cache++;
+ virtio_net_rsc_cache_buf(chain, nc, buf, size);
+ return size;
+}
+
+/* Drain a connection data, this is to avoid out of order segments */
+static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
+ NetClientState *nc,
+ const uint8_t *buf, size_t size,
+ uint16_t ip_start, uint16_t ip_size,
+ uint16_t tcp_port)
+{
+ VirtioNetRscSeg *seg, *nseg;
+ uint32_t ppair1, ppair2;
+
+ ppair1 = *(uint32_t *)(buf + tcp_port);
+ QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
+ ppair2 = *(uint32_t *)(seg->buf + tcp_port);
+ if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
+ || (ppair1 != ppair2)) {
+ continue;
+ }
+ if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
+ chain->stat.drain_failed++;
+ }
+
+ break;
+ }
+
+ return virtio_net_do_receive(nc, buf, size);
+}
+
+static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
+ struct ip_header *ip,
+ const uint8_t *buf, size_t size)
+{
+ uint16_t ip_len;
+
+ /* Not an ipv4 packet */
+ if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
+ chain->stat.ip_option++;
+ return RSC_BYPASS;
+ }
+
+ /* Don't handle packets with ip option */
+ if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
+ chain->stat.ip_option++;
+ return RSC_BYPASS;
+ }
+
+ if (ip->ip_p != IPPROTO_TCP) {
+ chain->stat.bypass_not_tcp++;
+ return RSC_BYPASS;
+ }
+
+ /* Don't handle packets with ip fragment */
+ if (!(htons(ip->ip_off) & IP_DF)) {
+ chain->stat.ip_frag++;
+ return RSC_BYPASS;
+ }
+
+ /* Don't handle packets with ecn flag */
+ if (IPTOS_ECN(ip->ip_tos)) {
+ chain->stat.ip_ecn++;
+ return RSC_BYPASS;
+ }
+
+ ip_len = htons(ip->ip_len);
+ if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
+ || ip_len > (size - chain->n->guest_hdr_len -
+ sizeof(struct eth_header))) {
+ chain->stat.ip_hacked++;
+ return RSC_BYPASS;
+ }
+
+ return RSC_CANDIDATE;
+}
+
+static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
+ NetClientState *nc,
+ const uint8_t *buf, size_t size)
+{
+ int32_t ret;
+ uint16_t hdr_len;
+ VirtioNetRscUnit unit;
+
+ hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
+
+ if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
+ + sizeof(struct tcp_header))) {
+ chain->stat.bypass_not_tcp++;
+ return virtio_net_do_receive(nc, buf, size);
+ }
+
+ virtio_net_rsc_extract_unit4(chain, buf, &unit);
+ if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
+ != RSC_CANDIDATE) {
+ return virtio_net_do_receive(nc, buf, size);
+ }
+
+ ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
+ if (ret == RSC_BYPASS) {
+ return virtio_net_do_receive(nc, buf, size);
+ } else if (ret == RSC_FINAL) {
+ return virtio_net_rsc_drain_flow(chain, nc, buf, size,
+ ((hdr_len + sizeof(struct eth_header)) + 12),
+ VIRTIO_NET_IP4_ADDR_SIZE,
+ hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
+ }
+
+ return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
+}
+
+static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
+ struct ip6_header *ip6,
+ const uint8_t *buf, size_t size)
+{
+ uint16_t ip_len;
+
+ if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
+ != IP_HEADER_VERSION_6) {
+ return RSC_BYPASS;
+ }
+
+ /* Both option and protocol is checked in this */
+ if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
+ chain->stat.bypass_not_tcp++;
+ return RSC_BYPASS;
+ }
+
+ ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
+ if (ip_len < sizeof(struct tcp_header) ||
+ ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
+ - sizeof(struct ip6_header))) {
+ chain->stat.ip_hacked++;
+ return RSC_BYPASS;
+ }
+
+ /* Don't handle packets with ecn flag */
+ if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
+ chain->stat.ip_ecn++;
+ return RSC_BYPASS;
+ }
+
+ return RSC_CANDIDATE;
+}
+
+static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
+ const uint8_t *buf, size_t size)
+{
+ int32_t ret;
+ uint16_t hdr_len;
+ VirtioNetRscChain *chain;
+ VirtioNetRscUnit unit;
+
+ chain = (VirtioNetRscChain *)opq;
+ hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
+
+ if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
+ + sizeof(tcp_header))) {
+ return virtio_net_do_receive(nc, buf, size);
+ }
+
+ virtio_net_rsc_extract_unit6(chain, buf, &unit);
+ if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
+ unit.ip, buf, size)) {
+ return virtio_net_do_receive(nc, buf, size);
+ }
+
+ ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
+ if (ret == RSC_BYPASS) {
+ return virtio_net_do_receive(nc, buf, size);
+ } else if (ret == RSC_FINAL) {
+ return virtio_net_rsc_drain_flow(chain, nc, buf, size,
+ ((hdr_len + sizeof(struct eth_header)) + 8),
+ VIRTIO_NET_IP6_ADDR_SIZE,
+ hdr_len + sizeof(struct eth_header)
+ + sizeof(struct ip6_header));
+ }
+
+ return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
+}
+
+static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
+ NetClientState *nc,
+ uint16_t proto)
+{
+ VirtioNetRscChain *chain;
+
+ if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
+ return NULL;
+ }
+
+ QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
+ if (chain->proto == proto) {
+ return chain;
+ }
+ }
+
+ chain = g_malloc(sizeof(*chain));
+ chain->n = n;
+ chain->proto = proto;
+ if (proto == (uint16_t)ETH_P_IP) {
+ chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
+ chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+ } else {
+ chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
+ chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+ }
+ chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
+ virtio_net_rsc_purge, chain);
+ memset(&chain->stat, 0, sizeof(chain->stat));
+
+ QTAILQ_INIT(&chain->buffers);
+ QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
+
+ return chain;
+}
+
+static ssize_t virtio_net_rsc_receive(NetClientState *nc,
+ const uint8_t *buf,
+ size_t size)
+{
+ uint16_t proto;
+ VirtioNetRscChain *chain;
+ struct eth_header *eth;
+ VirtIONet *n;
+
+ n = qemu_get_nic_opaque(nc);
+ if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
+ return virtio_net_do_receive(nc, buf, size);
+ }
+
+ eth = (struct eth_header *)(buf + n->guest_hdr_len);
+ proto = htons(eth->h_proto);
+
+ chain = virtio_net_rsc_lookup_chain(n, nc, proto);
+ if (chain) {
+ chain->stat.received++;
+ if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
+ return virtio_net_rsc_receive4(chain, nc, buf, size);
+ } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
+ return virtio_net_rsc_receive6(chain, nc, buf, size);
+ }
+ }
+ return virtio_net_do_receive(nc, buf, size);
+}
+
+static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
+ size_t size)
+{
+ VirtIONet *n = qemu_get_nic_opaque(nc);
+ if ((n->rsc4_enabled || n->rsc6_enabled)) {
+ return virtio_net_rsc_receive(nc, buf, size);
+ } else {
+ return virtio_net_do_receive(nc, buf, size);
+ }
+}
+
static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
n->guest_hdr_len, -1);
if (out_num == VIRTQUEUE_MAX_SIZE) {
goto drop;
- }
+ }
out_num += 1;
out_sg = sg2;
- }
+ }
}
/*
* If host wants to see the guest header as is, we can
VirtIODevice *vdev = VIRTIO_DEVICE(n);
int i, link_down;
+ trace_virtio_net_post_load_device();
virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
virtio_vdev_has_feature(vdev,
VIRTIO_F_VERSION_1));
if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
- n->announce_counter = SELF_ANNOUNCE_ROUNDS;
- timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
+ qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
+ QEMU_CLOCK_VIRTUAL,
+ virtio_net_announce_timer, n);
+ if (n->announce_timer.round) {
+ timer_mod(n->announce_timer.tm,
+ qemu_clock_get_ms(n->announce_timer.type));
+ } else {
+ qemu_announce_timer_del(&n->announce_timer);
+ }
}
return 0;
* pointer and count and also validate the count.
*/
-static void virtio_net_tx_waiting_pre_save(void *opaque)
+static int virtio_net_tx_waiting_pre_save(void *opaque)
{
struct VirtIONetMigTmp *tmp = opaque;
if (tmp->parent->curr_queues == 0) {
tmp->curr_queues_1 = 0;
}
+
+ return 0;
}
static int virtio_net_tx_waiting_pre_load(void *opaque)
return 0;
}
-static void virtio_net_ufo_pre_save(void *opaque)
+static int virtio_net_ufo_pre_save(void *opaque)
{
struct VirtIONetMigTmp *tmp = opaque;
tmp->has_ufo = tmp->parent->has_ufo;
+
+ return 0;
}
static const VMStateDescription vmstate_virtio_net_has_ufo = {
return 0;
}
-static void virtio_net_vnet_pre_save(void *opaque)
+static int virtio_net_vnet_pre_save(void *opaque)
{
struct VirtIONetMigTmp *tmp = opaque;
tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
+
+ return 0;
}
static const VMStateDescription vmstate_virtio_net_has_vnet = {
.receive = virtio_net_receive,
.link_status_changed = virtio_net_set_link_status,
.query_rx_filter = virtio_net_query_rxfilter,
+ .announce = virtio_net_announce,
};
static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
{
- int i, config_size = 0;
virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
- for (i = 0; feature_sizes[i].flags != 0; i++) {
- if (host_features & feature_sizes[i].flags) {
- config_size = MAX(feature_sizes[i].end, config_size);
- }
- }
- n->config_size = config_size;
+ n->config_size = virtio_feature_get_config_size(feature_sizes,
+ host_features);
}
void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
int i;
if (n->net_conf.mtu) {
- n->host_features |= (0x1 << VIRTIO_NET_F_MTU);
+ n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
+ }
+
+ if (n->net_conf.duplex_str) {
+ if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
+ n->net_conf.duplex = DUPLEX_HALF;
+ } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
+ n->net_conf.duplex = DUPLEX_FULL;
+ } else {
+ error_setg(errp, "'duplex' must be 'half' or 'full'");
+ }
+ n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
+ } else {
+ n->net_conf.duplex = DUPLEX_UNKNOWN;
+ }
+
+ if (n->net_conf.speed < SPEED_UNKNOWN) {
+ error_setg(errp, "'speed' must be between 0 and INT_MAX");
+ } else if (n->net_conf.speed >= 0) {
+ n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
}
virtio_net_set_config_size(n, n->host_features);
*/
if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
- (n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) {
+ !is_power_of_2(n->net_conf.rx_queue_size)) {
error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
"must be a power of 2 between %d and %d.",
n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
&& strcmp(n->net_conf.tx, "bh")) {
- error_report("virtio-net: "
- "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
- n->net_conf.tx);
- error_report("Defaulting to \"bh\"");
+ warn_report("virtio-net: "
+ "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
+ n->net_conf.tx);
+ error_printf("Defaulting to \"bh\"");
}
n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
n->status = VIRTIO_NET_S_LINK_UP;
- n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
- virtio_net_announce_timer, n);
+ qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
+ QEMU_CLOCK_VIRTUAL,
+ virtio_net_announce_timer, n);
+ n->announce_timer.round = 0;
if (n->netclient_type) {
/*
nc = qemu_get_queue(n->nic);
nc->rxfilter_notify_enabled = 1;
+ QTAILQ_INIT(&n->rsc_chains);
n->qdev = dev;
}
virtio_net_del_queue(n, i);
}
- timer_del(n->announce_timer);
- timer_free(n->announce_timer);
+ qemu_announce_timer_del(&n->announce_timer);
g_free(n->vqs);
qemu_del_nic(n->nic);
+ virtio_net_rsc_cleanup(n);
virtio_cleanup(vdev);
}
DEVICE(n), NULL);
}
-static void virtio_net_pre_save(void *opaque)
+static int virtio_net_pre_save(void *opaque)
{
VirtIONet *n = opaque;
/* At this point, backend must be stopped, otherwise
* it might keep writing to memory. */
assert(!n->vhost_started);
+
+ return 0;
}
static const VMStateDescription vmstate_virtio_net = {
};
static Property virtio_net_properties[] = {
- DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true),
- DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
+ VIRTIO_NET_F_CSUM, true),
+ DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_CSUM, true),
- DEFINE_PROP_BIT("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
- DEFINE_PROP_BIT("guest_tso4", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
+ DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_TSO4, true),
- DEFINE_PROP_BIT("guest_tso6", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_TSO6, true),
- DEFINE_PROP_BIT("guest_ecn", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_ECN, true),
- DEFINE_PROP_BIT("guest_ufo", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_UFO, true),
- DEFINE_PROP_BIT("guest_announce", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
VIRTIO_NET_F_GUEST_ANNOUNCE, true),
- DEFINE_PROP_BIT("host_tso4", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
VIRTIO_NET_F_HOST_TSO4, true),
- DEFINE_PROP_BIT("host_tso6", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
VIRTIO_NET_F_HOST_TSO6, true),
- DEFINE_PROP_BIT("host_ecn", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
VIRTIO_NET_F_HOST_ECN, true),
- DEFINE_PROP_BIT("host_ufo", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
VIRTIO_NET_F_HOST_UFO, true),
- DEFINE_PROP_BIT("mrg_rxbuf", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
VIRTIO_NET_F_MRG_RXBUF, true),
- DEFINE_PROP_BIT("status", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("status", VirtIONet, host_features,
VIRTIO_NET_F_STATUS, true),
- DEFINE_PROP_BIT("ctrl_vq", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_VQ, true),
- DEFINE_PROP_BIT("ctrl_rx", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_RX, true),
- DEFINE_PROP_BIT("ctrl_vlan", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_VLAN, true),
- DEFINE_PROP_BIT("ctrl_rx_extra", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_RX_EXTRA, true),
- DEFINE_PROP_BIT("ctrl_mac_addr", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_MAC_ADDR, true),
- DEFINE_PROP_BIT("ctrl_guest_offloads", VirtIONet, host_features,
+ DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
- DEFINE_PROP_BIT("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
+ DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
+ DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
+ VIRTIO_NET_F_RSC_EXT, false),
+ DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
+ VIRTIO_NET_RSC_DEFAULT_INTERVAL),
DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
TX_TIMER_INTERVAL),
DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
true),
+ DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
+ DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
DEFINE_PROP_END_OF_LIST(),
};