2 * Network-device interface management.
4 * Copyright (c) 2004-2005, Keir Fraser
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version 2
8 * as published by the Free Software Foundation; or, when distributed
9 * separately from the Linux kernel or incorporated into other
10 * software packages, subject to the following license:
12 * Permission is hereby granted, free of charge, to any person obtaining a copy
13 * of this source file (the "Software"), to deal in the Software without
14 * restriction, including without limitation the rights to use, copy, modify,
15 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16 * and to permit persons to whom the Software is furnished to do so, subject to
17 * the following conditions:
19 * The above copyright notice and this permission notice shall be included in
20 * all copies or substantial portions of the Software.
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
33 #include <linux/kthread.h>
34 #include <linux/ethtool.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/if_vlan.h>
37 #include <linux/vmalloc.h>
39 #include <xen/events.h>
40 #include <asm/xen/hypercall.h>
41 #include <xen/balloon.h>
43 #define XENVIF_QUEUE_LENGTH 32
44 #define XENVIF_NAPI_WEIGHT 64
46 /* Number of bytes allowed on the internal guest Rx queue. */
47 #define XENVIF_RX_QUEUE_BYTES (XEN_NETIF_RX_RING_SIZE/2 * PAGE_SIZE)
49 /* This function is used to set SKBTX_DEV_ZEROCOPY as well as
50 * increasing the inflight counter. We need to increase the inflight
51 * counter because core driver calls into xenvif_zerocopy_callback
52 * which calls xenvif_skb_zerocopy_complete.
54 void xenvif_skb_zerocopy_prepare(struct xenvif_queue
*queue
,
57 skb_shinfo(skb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
58 atomic_inc(&queue
->inflight_packets
);
61 void xenvif_skb_zerocopy_complete(struct xenvif_queue
*queue
)
63 atomic_dec(&queue
->inflight_packets
);
65 /* Wake the dealloc thread _after_ decrementing inflight_packets so
66 * that if kthread_stop() has already been called, the dealloc thread
67 * does not wait forever with nothing to wake it.
69 wake_up(&queue
->dealloc_wq
);
72 int xenvif_schedulable(struct xenvif
*vif
)
74 return netif_running(vif
->dev
) &&
75 test_bit(VIF_STATUS_CONNECTED
, &vif
->status
) &&
79 static irqreturn_t
xenvif_tx_interrupt(int irq
, void *dev_id
)
81 struct xenvif_queue
*queue
= dev_id
;
83 if (RING_HAS_UNCONSUMED_REQUESTS(&queue
->tx
))
84 napi_schedule(&queue
->napi
);
89 static int xenvif_poll(struct napi_struct
*napi
, int budget
)
91 struct xenvif_queue
*queue
=
92 container_of(napi
, struct xenvif_queue
, napi
);
95 /* This vif is rogue, we pretend we've there is nothing to do
96 * for this vif to deschedule it from NAPI. But this interface
97 * will be turned off in thread context later.
99 if (unlikely(queue
->vif
->disabled
)) {
104 work_done
= xenvif_tx_action(queue
, budget
);
106 if (work_done
< budget
) {
108 xenvif_napi_schedule_or_enable_events(queue
);
114 static irqreturn_t
xenvif_rx_interrupt(int irq
, void *dev_id
)
116 struct xenvif_queue
*queue
= dev_id
;
118 xenvif_kick_thread(queue
);
123 irqreturn_t
xenvif_interrupt(int irq
, void *dev_id
)
125 xenvif_tx_interrupt(irq
, dev_id
);
126 xenvif_rx_interrupt(irq
, dev_id
);
131 int xenvif_queue_stopped(struct xenvif_queue
*queue
)
133 struct net_device
*dev
= queue
->vif
->dev
;
134 unsigned int id
= queue
->id
;
135 return netif_tx_queue_stopped(netdev_get_tx_queue(dev
, id
));
138 void xenvif_wake_queue(struct xenvif_queue
*queue
)
140 struct net_device
*dev
= queue
->vif
->dev
;
141 unsigned int id
= queue
->id
;
142 netif_tx_wake_queue(netdev_get_tx_queue(dev
, id
));
145 static int xenvif_start_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
147 struct xenvif
*vif
= netdev_priv(dev
);
148 struct xenvif_queue
*queue
= NULL
;
149 unsigned int num_queues
= vif
->num_queues
;
151 struct xenvif_rx_cb
*cb
;
153 BUG_ON(skb
->dev
!= dev
);
155 /* Drop the packet if queues are not set up */
159 /* Obtain the queue to be used to transmit this packet */
160 index
= skb_get_queue_mapping(skb
);
161 if (index
>= num_queues
) {
162 pr_warn_ratelimited("Invalid queue %hu for packet on interface %s\n.",
163 index
, vif
->dev
->name
);
166 queue
= &vif
->queues
[index
];
168 /* Drop the packet if queue is not ready */
169 if (queue
->task
== NULL
||
170 queue
->dealloc_task
== NULL
||
171 !xenvif_schedulable(vif
))
174 cb
= XENVIF_RX_CB(skb
);
175 cb
->expires
= jiffies
+ vif
->drain_timeout
;
177 xenvif_rx_queue_tail(queue
, skb
);
178 xenvif_kick_thread(queue
);
183 vif
->dev
->stats
.tx_dropped
++;
188 static struct net_device_stats
*xenvif_get_stats(struct net_device
*dev
)
190 struct xenvif
*vif
= netdev_priv(dev
);
191 struct xenvif_queue
*queue
= NULL
;
192 unsigned int num_queues
= vif
->num_queues
;
193 unsigned long rx_bytes
= 0;
194 unsigned long rx_packets
= 0;
195 unsigned long tx_bytes
= 0;
196 unsigned long tx_packets
= 0;
199 if (vif
->queues
== NULL
)
202 /* Aggregate tx and rx stats from each queue */
203 for (index
= 0; index
< num_queues
; ++index
) {
204 queue
= &vif
->queues
[index
];
205 rx_bytes
+= queue
->stats
.rx_bytes
;
206 rx_packets
+= queue
->stats
.rx_packets
;
207 tx_bytes
+= queue
->stats
.tx_bytes
;
208 tx_packets
+= queue
->stats
.tx_packets
;
212 vif
->dev
->stats
.rx_bytes
= rx_bytes
;
213 vif
->dev
->stats
.rx_packets
= rx_packets
;
214 vif
->dev
->stats
.tx_bytes
= tx_bytes
;
215 vif
->dev
->stats
.tx_packets
= tx_packets
;
217 return &vif
->dev
->stats
;
220 static void xenvif_up(struct xenvif
*vif
)
222 struct xenvif_queue
*queue
= NULL
;
223 unsigned int num_queues
= vif
->num_queues
;
224 unsigned int queue_index
;
226 for (queue_index
= 0; queue_index
< num_queues
; ++queue_index
) {
227 queue
= &vif
->queues
[queue_index
];
228 napi_enable(&queue
->napi
);
229 enable_irq(queue
->tx_irq
);
230 if (queue
->tx_irq
!= queue
->rx_irq
)
231 enable_irq(queue
->rx_irq
);
232 xenvif_napi_schedule_or_enable_events(queue
);
236 static void xenvif_down(struct xenvif
*vif
)
238 struct xenvif_queue
*queue
= NULL
;
239 unsigned int num_queues
= vif
->num_queues
;
240 unsigned int queue_index
;
242 for (queue_index
= 0; queue_index
< num_queues
; ++queue_index
) {
243 queue
= &vif
->queues
[queue_index
];
244 disable_irq(queue
->tx_irq
);
245 if (queue
->tx_irq
!= queue
->rx_irq
)
246 disable_irq(queue
->rx_irq
);
247 napi_disable(&queue
->napi
);
248 del_timer_sync(&queue
->credit_timeout
);
252 static int xenvif_open(struct net_device
*dev
)
254 struct xenvif
*vif
= netdev_priv(dev
);
255 if (test_bit(VIF_STATUS_CONNECTED
, &vif
->status
))
257 netif_tx_start_all_queues(dev
);
261 static int xenvif_close(struct net_device
*dev
)
263 struct xenvif
*vif
= netdev_priv(dev
);
264 if (test_bit(VIF_STATUS_CONNECTED
, &vif
->status
))
266 netif_tx_stop_all_queues(dev
);
270 static int xenvif_change_mtu(struct net_device
*dev
, int mtu
)
272 struct xenvif
*vif
= netdev_priv(dev
);
273 int max
= vif
->can_sg
? 65535 - VLAN_ETH_HLEN
: ETH_DATA_LEN
;
281 static netdev_features_t
xenvif_fix_features(struct net_device
*dev
,
282 netdev_features_t features
)
284 struct xenvif
*vif
= netdev_priv(dev
);
287 features
&= ~NETIF_F_SG
;
288 if (~(vif
->gso_mask
| vif
->gso_prefix_mask
) & GSO_BIT(TCPV4
))
289 features
&= ~NETIF_F_TSO
;
290 if (~(vif
->gso_mask
| vif
->gso_prefix_mask
) & GSO_BIT(TCPV6
))
291 features
&= ~NETIF_F_TSO6
;
293 features
&= ~NETIF_F_IP_CSUM
;
295 features
&= ~NETIF_F_IPV6_CSUM
;
300 static const struct xenvif_stat
{
301 char name
[ETH_GSTRING_LEN
];
305 "rx_gso_checksum_fixup",
306 offsetof(struct xenvif_stats
, rx_gso_checksum_fixup
)
308 /* If (sent != success + fail), there are probably packets never
313 offsetof(struct xenvif_stats
, tx_zerocopy_sent
),
316 "tx_zerocopy_success",
317 offsetof(struct xenvif_stats
, tx_zerocopy_success
),
321 offsetof(struct xenvif_stats
, tx_zerocopy_fail
)
323 /* Number of packets exceeding MAX_SKB_FRAG slots. You should use
324 * a guest with the same MAX_SKB_FRAG
328 offsetof(struct xenvif_stats
, tx_frag_overflow
)
332 static int xenvif_get_sset_count(struct net_device
*dev
, int string_set
)
334 switch (string_set
) {
336 return ARRAY_SIZE(xenvif_stats
);
342 static void xenvif_get_ethtool_stats(struct net_device
*dev
,
343 struct ethtool_stats
*stats
, u64
* data
)
345 struct xenvif
*vif
= netdev_priv(dev
);
346 unsigned int num_queues
= vif
->num_queues
;
348 unsigned int queue_index
;
350 for (i
= 0; i
< ARRAY_SIZE(xenvif_stats
); i
++) {
351 unsigned long accum
= 0;
352 for (queue_index
= 0; queue_index
< num_queues
; ++queue_index
) {
353 void *vif_stats
= &vif
->queues
[queue_index
].stats
;
354 accum
+= *(unsigned long *)(vif_stats
+ xenvif_stats
[i
].offset
);
360 static void xenvif_get_strings(struct net_device
*dev
, u32 stringset
, u8
* data
)
366 for (i
= 0; i
< ARRAY_SIZE(xenvif_stats
); i
++)
367 memcpy(data
+ i
* ETH_GSTRING_LEN
,
368 xenvif_stats
[i
].name
, ETH_GSTRING_LEN
);
373 static const struct ethtool_ops xenvif_ethtool_ops
= {
374 .get_link
= ethtool_op_get_link
,
376 .get_sset_count
= xenvif_get_sset_count
,
377 .get_ethtool_stats
= xenvif_get_ethtool_stats
,
378 .get_strings
= xenvif_get_strings
,
381 static const struct net_device_ops xenvif_netdev_ops
= {
382 .ndo_start_xmit
= xenvif_start_xmit
,
383 .ndo_get_stats
= xenvif_get_stats
,
384 .ndo_open
= xenvif_open
,
385 .ndo_stop
= xenvif_close
,
386 .ndo_change_mtu
= xenvif_change_mtu
,
387 .ndo_fix_features
= xenvif_fix_features
,
388 .ndo_set_mac_address
= eth_mac_addr
,
389 .ndo_validate_addr
= eth_validate_addr
,
392 struct xenvif
*xenvif_alloc(struct device
*parent
, domid_t domid
,
396 struct net_device
*dev
;
398 char name
[IFNAMSIZ
] = {};
400 snprintf(name
, IFNAMSIZ
- 1, "vif%u.%u", domid
, handle
);
401 /* Allocate a netdev with the max. supported number of queues.
402 * When the guest selects the desired number, it will be updated
403 * via netif_set_real_num_*_queues().
405 dev
= alloc_netdev_mq(sizeof(struct xenvif
), name
, NET_NAME_UNKNOWN
,
406 ether_setup
, xenvif_max_queues
);
408 pr_warn("Could not allocate netdev for %s\n", name
);
409 return ERR_PTR(-ENOMEM
);
412 SET_NETDEV_DEV(dev
, parent
);
414 vif
= netdev_priv(dev
);
417 vif
->handle
= handle
;
421 vif
->disabled
= false;
422 vif
->drain_timeout
= msecs_to_jiffies(rx_drain_timeout_msecs
);
423 vif
->stall_timeout
= msecs_to_jiffies(rx_stall_timeout_msecs
);
425 /* Start out with no queues. */
429 spin_lock_init(&vif
->lock
);
431 dev
->netdev_ops
= &xenvif_netdev_ops
;
432 dev
->hw_features
= NETIF_F_SG
|
433 NETIF_F_IP_CSUM
| NETIF_F_IPV6_CSUM
|
434 NETIF_F_TSO
| NETIF_F_TSO6
;
435 dev
->features
= dev
->hw_features
| NETIF_F_RXCSUM
;
436 dev
->ethtool_ops
= &xenvif_ethtool_ops
;
438 dev
->tx_queue_len
= XENVIF_QUEUE_LENGTH
;
441 * Initialise a dummy MAC address. We choose the numerically
442 * largest non-broadcast address to prevent the address getting
443 * stolen by an Ethernet bridge for STP purposes.
444 * (FE:FF:FF:FF:FF:FF)
446 eth_broadcast_addr(dev
->dev_addr
);
447 dev
->dev_addr
[0] &= ~0x01;
449 netif_carrier_off(dev
);
451 err
= register_netdev(dev
);
453 netdev_warn(dev
, "Could not register device: err=%d\n", err
);
458 netdev_dbg(dev
, "Successfully created xenvif\n");
460 __module_get(THIS_MODULE
);
465 int xenvif_init_queue(struct xenvif_queue
*queue
)
469 queue
->credit_bytes
= queue
->remaining_credit
= ~0UL;
470 queue
->credit_usec
= 0UL;
471 init_timer(&queue
->credit_timeout
);
472 queue
->credit_timeout
.function
= xenvif_tx_credit_callback
;
473 queue
->credit_window_start
= get_jiffies_64();
475 queue
->rx_queue_max
= XENVIF_RX_QUEUE_BYTES
;
477 skb_queue_head_init(&queue
->rx_queue
);
478 skb_queue_head_init(&queue
->tx_queue
);
480 queue
->pending_cons
= 0;
481 queue
->pending_prod
= MAX_PENDING_REQS
;
482 for (i
= 0; i
< MAX_PENDING_REQS
; ++i
)
483 queue
->pending_ring
[i
] = i
;
485 spin_lock_init(&queue
->callback_lock
);
486 spin_lock_init(&queue
->response_lock
);
488 /* If ballooning is disabled, this will consume real memory, so you
489 * better enable it. The long term solution would be to use just a
490 * bunch of valid page descriptors, without dependency on ballooning
492 err
= gnttab_alloc_pages(MAX_PENDING_REQS
,
495 netdev_err(queue
->vif
->dev
, "Could not reserve mmap_pages\n");
499 for (i
= 0; i
< MAX_PENDING_REQS
; i
++) {
500 queue
->pending_tx_info
[i
].callback_struct
= (struct ubuf_info
)
501 { .callback
= xenvif_zerocopy_callback
,
504 queue
->grant_tx_handle
[i
] = NETBACK_INVALID_HANDLE
;
510 void xenvif_carrier_on(struct xenvif
*vif
)
513 if (!vif
->can_sg
&& vif
->dev
->mtu
> ETH_DATA_LEN
)
514 dev_set_mtu(vif
->dev
, ETH_DATA_LEN
);
515 netdev_update_features(vif
->dev
);
516 set_bit(VIF_STATUS_CONNECTED
, &vif
->status
);
517 if (netif_running(vif
->dev
))
522 int xenvif_connect(struct xenvif_queue
*queue
, unsigned long tx_ring_ref
,
523 unsigned long rx_ring_ref
, unsigned int tx_evtchn
,
524 unsigned int rx_evtchn
)
526 struct task_struct
*task
;
529 BUG_ON(queue
->tx_irq
);
531 BUG_ON(queue
->dealloc_task
);
533 err
= xenvif_map_frontend_rings(queue
, tx_ring_ref
, rx_ring_ref
);
537 init_waitqueue_head(&queue
->wq
);
538 init_waitqueue_head(&queue
->dealloc_wq
);
539 atomic_set(&queue
->inflight_packets
, 0);
541 netif_napi_add(queue
->vif
->dev
, &queue
->napi
, xenvif_poll
,
544 if (tx_evtchn
== rx_evtchn
) {
545 /* feature-split-event-channels == 0 */
546 err
= bind_interdomain_evtchn_to_irqhandler(
547 queue
->vif
->domid
, tx_evtchn
, xenvif_interrupt
, 0,
551 queue
->tx_irq
= queue
->rx_irq
= err
;
552 disable_irq(queue
->tx_irq
);
554 /* feature-split-event-channels == 1 */
555 snprintf(queue
->tx_irq_name
, sizeof(queue
->tx_irq_name
),
556 "%s-tx", queue
->name
);
557 err
= bind_interdomain_evtchn_to_irqhandler(
558 queue
->vif
->domid
, tx_evtchn
, xenvif_tx_interrupt
, 0,
559 queue
->tx_irq_name
, queue
);
563 disable_irq(queue
->tx_irq
);
565 snprintf(queue
->rx_irq_name
, sizeof(queue
->rx_irq_name
),
566 "%s-rx", queue
->name
);
567 err
= bind_interdomain_evtchn_to_irqhandler(
568 queue
->vif
->domid
, rx_evtchn
, xenvif_rx_interrupt
, 0,
569 queue
->rx_irq_name
, queue
);
573 disable_irq(queue
->rx_irq
);
576 queue
->stalled
= true;
578 task
= kthread_create(xenvif_kthread_guest_rx
,
579 (void *)queue
, "%s-guest-rx", queue
->name
);
581 pr_warn("Could not allocate kthread for %s\n", queue
->name
);
586 get_task_struct(task
);
588 task
= kthread_create(xenvif_dealloc_kthread
,
589 (void *)queue
, "%s-dealloc", queue
->name
);
591 pr_warn("Could not allocate kthread for %s\n", queue
->name
);
595 queue
->dealloc_task
= task
;
597 wake_up_process(queue
->task
);
598 wake_up_process(queue
->dealloc_task
);
603 unbind_from_irqhandler(queue
->rx_irq
, queue
);
606 unbind_from_irqhandler(queue
->tx_irq
, queue
);
609 xenvif_unmap_frontend_rings(queue
);
611 module_put(THIS_MODULE
);
615 void xenvif_carrier_off(struct xenvif
*vif
)
617 struct net_device
*dev
= vif
->dev
;
620 if (test_and_clear_bit(VIF_STATUS_CONNECTED
, &vif
->status
)) {
621 netif_carrier_off(dev
); /* discard queued packets */
622 if (netif_running(dev
))
628 void xenvif_disconnect(struct xenvif
*vif
)
630 struct xenvif_queue
*queue
= NULL
;
631 unsigned int num_queues
= vif
->num_queues
;
632 unsigned int queue_index
;
634 xenvif_carrier_off(vif
);
636 for (queue_index
= 0; queue_index
< num_queues
; ++queue_index
) {
637 queue
= &vif
->queues
[queue_index
];
639 netif_napi_del(&queue
->napi
);
642 kthread_stop(queue
->task
);
643 put_task_struct(queue
->task
);
647 if (queue
->dealloc_task
) {
648 kthread_stop(queue
->dealloc_task
);
649 queue
->dealloc_task
= NULL
;
653 if (queue
->tx_irq
== queue
->rx_irq
)
654 unbind_from_irqhandler(queue
->tx_irq
, queue
);
656 unbind_from_irqhandler(queue
->tx_irq
, queue
);
657 unbind_from_irqhandler(queue
->rx_irq
, queue
);
662 xenvif_unmap_frontend_rings(queue
);
666 /* Reverse the relevant parts of xenvif_init_queue().
667 * Used for queue teardown from xenvif_free(), and on the
668 * error handling paths in xenbus.c:connect().
670 void xenvif_deinit_queue(struct xenvif_queue
*queue
)
672 gnttab_free_pages(MAX_PENDING_REQS
, queue
->mmap_pages
);
675 void xenvif_free(struct xenvif
*vif
)
677 struct xenvif_queue
*queue
= NULL
;
678 unsigned int num_queues
= vif
->num_queues
;
679 unsigned int queue_index
;
681 unregister_netdev(vif
->dev
);
683 for (queue_index
= 0; queue_index
< num_queues
; ++queue_index
) {
684 queue
= &vif
->queues
[queue_index
];
685 xenvif_deinit_queue(queue
);
692 free_netdev(vif
->dev
);
694 module_put(THIS_MODULE
);