2 * Network-device interface management.
4 * Copyright (c) 2004-2005, Keir Fraser
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version 2
8 * as published by the Free Software Foundation; or, when distributed
9 * separately from the Linux kernel or incorporated into other
10 * software packages, subject to the following license:
12 * Permission is hereby granted, free of charge, to any person obtaining a copy
13 * of this source file (the "Software"), to deal in the Software without
14 * restriction, including without limitation the rights to use, copy, modify,
15 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16 * and to permit persons to whom the Software is furnished to do so, subject to
17 * the following conditions:
19 * The above copyright notice and this permission notice shall be included in
20 * all copies or substantial portions of the Software.
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
33 #include <linux/kthread.h>
34 #include <linux/ethtool.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/if_vlan.h>
37 #include <linux/vmalloc.h>
39 #include <xen/events.h>
40 #include <asm/xen/hypercall.h>
41 #include <xen/balloon.h>
43 #define XENVIF_QUEUE_LENGTH 32
44 #define XENVIF_NAPI_WEIGHT 64
46 int xenvif_schedulable(struct xenvif
*vif
)
48 return netif_running(vif
->dev
) && netif_carrier_ok(vif
->dev
);
51 static irqreturn_t
xenvif_tx_interrupt(int irq
, void *dev_id
)
53 struct xenvif
*vif
= dev_id
;
55 if (RING_HAS_UNCONSUMED_REQUESTS(&vif
->tx
))
56 napi_schedule(&vif
->napi
);
61 static int xenvif_poll(struct napi_struct
*napi
, int budget
)
63 struct xenvif
*vif
= container_of(napi
, struct xenvif
, napi
);
66 /* This vif is rogue, we pretend we've there is nothing to do
67 * for this vif to deschedule it from NAPI. But this interface
68 * will be turned off in thread context later.
70 if (unlikely(vif
->disabled
)) {
75 work_done
= xenvif_tx_action(vif
, budget
);
77 if (work_done
< budget
) {
81 /* It is necessary to disable IRQ before calling
82 * RING_HAS_UNCONSUMED_REQUESTS. Otherwise we might
83 * lose event from the frontend.
86 * RING_HAS_UNCONSUMED_REQUESTS
87 * <frontend generates event to trigger napi_schedule>
90 * This handler is still in scheduled state so the
91 * event has no effect at all. After __napi_complete
92 * this handler is descheduled and cannot get
93 * scheduled again. We lose event in this case and the ring
94 * will be completely stalled.
97 local_irq_save(flags
);
99 RING_FINAL_CHECK_FOR_REQUESTS(&vif
->tx
, more_to_do
);
101 __napi_complete(napi
);
103 local_irq_restore(flags
);
109 static irqreturn_t
xenvif_rx_interrupt(int irq
, void *dev_id
)
111 struct xenvif
*vif
= dev_id
;
113 xenvif_kick_thread(vif
);
118 static irqreturn_t
xenvif_interrupt(int irq
, void *dev_id
)
120 xenvif_tx_interrupt(irq
, dev_id
);
121 xenvif_rx_interrupt(irq
, dev_id
);
126 static void xenvif_wake_queue(unsigned long data
)
128 struct xenvif
*vif
= (struct xenvif
*)data
;
130 if (netif_queue_stopped(vif
->dev
)) {
131 netdev_err(vif
->dev
, "draining TX queue\n");
132 vif
->rx_queue_purge
= true;
133 xenvif_kick_thread(vif
);
134 netif_wake_queue(vif
->dev
);
138 static int xenvif_start_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
140 struct xenvif
*vif
= netdev_priv(dev
);
141 int min_slots_needed
;
143 BUG_ON(skb
->dev
!= dev
);
145 /* Drop the packet if vif is not ready */
146 if (vif
->task
== NULL
||
147 vif
->dealloc_task
== NULL
||
148 !xenvif_schedulable(vif
))
151 /* At best we'll need one slot for the header and one for each
154 min_slots_needed
= 1 + skb_shinfo(skb
)->nr_frags
;
156 /* If the skb is GSO then we'll also need an extra slot for the
162 /* If the skb can't possibly fit in the remaining slots
163 * then turn off the queue to give the ring a chance to
166 if (!xenvif_rx_ring_slots_available(vif
, min_slots_needed
)) {
167 vif
->wake_queue
.function
= xenvif_wake_queue
;
168 vif
->wake_queue
.data
= (unsigned long)vif
;
169 xenvif_stop_queue(vif
);
170 mod_timer(&vif
->wake_queue
,
171 jiffies
+ rx_drain_timeout_jiffies
);
174 skb_queue_tail(&vif
->rx_queue
, skb
);
175 xenvif_kick_thread(vif
);
180 vif
->dev
->stats
.tx_dropped
++;
185 static struct net_device_stats
*xenvif_get_stats(struct net_device
*dev
)
187 struct xenvif
*vif
= netdev_priv(dev
);
188 return &vif
->dev
->stats
;
191 static void xenvif_up(struct xenvif
*vif
)
193 napi_enable(&vif
->napi
);
194 enable_irq(vif
->tx_irq
);
195 if (vif
->tx_irq
!= vif
->rx_irq
)
196 enable_irq(vif
->rx_irq
);
197 xenvif_check_rx_xenvif(vif
);
200 static void xenvif_down(struct xenvif
*vif
)
202 napi_disable(&vif
->napi
);
203 disable_irq(vif
->tx_irq
);
204 if (vif
->tx_irq
!= vif
->rx_irq
)
205 disable_irq(vif
->rx_irq
);
206 del_timer_sync(&vif
->credit_timeout
);
209 static int xenvif_open(struct net_device
*dev
)
211 struct xenvif
*vif
= netdev_priv(dev
);
212 if (netif_carrier_ok(dev
))
214 netif_start_queue(dev
);
218 static int xenvif_close(struct net_device
*dev
)
220 struct xenvif
*vif
= netdev_priv(dev
);
221 if (netif_carrier_ok(dev
))
223 netif_stop_queue(dev
);
227 static int xenvif_change_mtu(struct net_device
*dev
, int mtu
)
229 struct xenvif
*vif
= netdev_priv(dev
);
230 int max
= vif
->can_sg
? 65535 - VLAN_ETH_HLEN
: ETH_DATA_LEN
;
238 static netdev_features_t
xenvif_fix_features(struct net_device
*dev
,
239 netdev_features_t features
)
241 struct xenvif
*vif
= netdev_priv(dev
);
244 features
&= ~NETIF_F_SG
;
245 if (~(vif
->gso_mask
| vif
->gso_prefix_mask
) & GSO_BIT(TCPV4
))
246 features
&= ~NETIF_F_TSO
;
247 if (~(vif
->gso_mask
| vif
->gso_prefix_mask
) & GSO_BIT(TCPV6
))
248 features
&= ~NETIF_F_TSO6
;
250 features
&= ~NETIF_F_IP_CSUM
;
252 features
&= ~NETIF_F_IPV6_CSUM
;
257 static const struct xenvif_stat
{
258 char name
[ETH_GSTRING_LEN
];
262 "rx_gso_checksum_fixup",
263 offsetof(struct xenvif
, rx_gso_checksum_fixup
)
265 /* If (sent != success + fail), there are probably packets never
270 offsetof(struct xenvif
, tx_zerocopy_sent
),
273 "tx_zerocopy_success",
274 offsetof(struct xenvif
, tx_zerocopy_success
),
278 offsetof(struct xenvif
, tx_zerocopy_fail
)
280 /* Number of packets exceeding MAX_SKB_FRAG slots. You should use
281 * a guest with the same MAX_SKB_FRAG
285 offsetof(struct xenvif
, tx_frag_overflow
)
289 static int xenvif_get_sset_count(struct net_device
*dev
, int string_set
)
291 switch (string_set
) {
293 return ARRAY_SIZE(xenvif_stats
);
299 static void xenvif_get_ethtool_stats(struct net_device
*dev
,
300 struct ethtool_stats
*stats
, u64
* data
)
302 void *vif
= netdev_priv(dev
);
305 for (i
= 0; i
< ARRAY_SIZE(xenvif_stats
); i
++)
306 data
[i
] = *(unsigned long *)(vif
+ xenvif_stats
[i
].offset
);
309 static void xenvif_get_strings(struct net_device
*dev
, u32 stringset
, u8
* data
)
315 for (i
= 0; i
< ARRAY_SIZE(xenvif_stats
); i
++)
316 memcpy(data
+ i
* ETH_GSTRING_LEN
,
317 xenvif_stats
[i
].name
, ETH_GSTRING_LEN
);
322 static const struct ethtool_ops xenvif_ethtool_ops
= {
323 .get_link
= ethtool_op_get_link
,
325 .get_sset_count
= xenvif_get_sset_count
,
326 .get_ethtool_stats
= xenvif_get_ethtool_stats
,
327 .get_strings
= xenvif_get_strings
,
330 static const struct net_device_ops xenvif_netdev_ops
= {
331 .ndo_start_xmit
= xenvif_start_xmit
,
332 .ndo_get_stats
= xenvif_get_stats
,
333 .ndo_open
= xenvif_open
,
334 .ndo_stop
= xenvif_close
,
335 .ndo_change_mtu
= xenvif_change_mtu
,
336 .ndo_fix_features
= xenvif_fix_features
,
337 .ndo_set_mac_address
= eth_mac_addr
,
338 .ndo_validate_addr
= eth_validate_addr
,
341 struct xenvif
*xenvif_alloc(struct device
*parent
, domid_t domid
,
345 struct net_device
*dev
;
347 char name
[IFNAMSIZ
] = {};
350 snprintf(name
, IFNAMSIZ
- 1, "vif%u.%u", domid
, handle
);
351 dev
= alloc_netdev(sizeof(struct xenvif
), name
, ether_setup
);
353 pr_warn("Could not allocate netdev for %s\n", name
);
354 return ERR_PTR(-ENOMEM
);
357 SET_NETDEV_DEV(dev
, parent
);
359 vif
= netdev_priv(dev
);
361 vif
->grant_copy_op
= vmalloc(sizeof(struct gnttab_copy
) *
363 if (vif
->grant_copy_op
== NULL
) {
364 pr_warn("Could not allocate grant copy space for %s\n", name
);
366 return ERR_PTR(-ENOMEM
);
370 vif
->handle
= handle
;
375 vif
->disabled
= false;
377 vif
->credit_bytes
= vif
->remaining_credit
= ~0UL;
378 vif
->credit_usec
= 0UL;
379 init_timer(&vif
->credit_timeout
);
380 vif
->credit_window_start
= get_jiffies_64();
382 init_timer(&vif
->wake_queue
);
384 dev
->netdev_ops
= &xenvif_netdev_ops
;
385 dev
->hw_features
= NETIF_F_SG
|
386 NETIF_F_IP_CSUM
| NETIF_F_IPV6_CSUM
|
387 NETIF_F_TSO
| NETIF_F_TSO6
;
388 dev
->features
= dev
->hw_features
| NETIF_F_RXCSUM
;
389 dev
->ethtool_ops
= &xenvif_ethtool_ops
;
391 dev
->tx_queue_len
= XENVIF_QUEUE_LENGTH
;
393 skb_queue_head_init(&vif
->rx_queue
);
394 skb_queue_head_init(&vif
->tx_queue
);
396 vif
->pending_cons
= 0;
397 vif
->pending_prod
= MAX_PENDING_REQS
;
398 for (i
= 0; i
< MAX_PENDING_REQS
; i
++)
399 vif
->pending_ring
[i
] = i
;
400 spin_lock_init(&vif
->callback_lock
);
401 spin_lock_init(&vif
->response_lock
);
402 /* If ballooning is disabled, this will consume real memory, so you
403 * better enable it. The long term solution would be to use just a
404 * bunch of valid page descriptors, without dependency on ballooning
406 err
= alloc_xenballooned_pages(MAX_PENDING_REQS
,
410 netdev_err(dev
, "Could not reserve mmap_pages\n");
411 return ERR_PTR(-ENOMEM
);
413 for (i
= 0; i
< MAX_PENDING_REQS
; i
++) {
414 vif
->pending_tx_info
[i
].callback_struct
= (struct ubuf_info
)
415 { .callback
= xenvif_zerocopy_callback
,
418 vif
->grant_tx_handle
[i
] = NETBACK_INVALID_HANDLE
;
422 * Initialise a dummy MAC address. We choose the numerically
423 * largest non-broadcast address to prevent the address getting
424 * stolen by an Ethernet bridge for STP purposes.
425 * (FE:FF:FF:FF:FF:FF)
427 memset(dev
->dev_addr
, 0xFF, ETH_ALEN
);
428 dev
->dev_addr
[0] &= ~0x01;
430 netif_napi_add(dev
, &vif
->napi
, xenvif_poll
, XENVIF_NAPI_WEIGHT
);
432 netif_carrier_off(dev
);
434 err
= register_netdev(dev
);
436 netdev_warn(dev
, "Could not register device: err=%d\n", err
);
441 netdev_dbg(dev
, "Successfully created xenvif\n");
443 __module_get(THIS_MODULE
);
448 int xenvif_connect(struct xenvif
*vif
, unsigned long tx_ring_ref
,
449 unsigned long rx_ring_ref
, unsigned int tx_evtchn
,
450 unsigned int rx_evtchn
)
452 struct task_struct
*task
;
457 BUG_ON(vif
->dealloc_task
);
459 err
= xenvif_map_frontend_rings(vif
, tx_ring_ref
, rx_ring_ref
);
463 init_waitqueue_head(&vif
->wq
);
464 init_waitqueue_head(&vif
->dealloc_wq
);
466 if (tx_evtchn
== rx_evtchn
) {
467 /* feature-split-event-channels == 0 */
468 err
= bind_interdomain_evtchn_to_irqhandler(
469 vif
->domid
, tx_evtchn
, xenvif_interrupt
, 0,
470 vif
->dev
->name
, vif
);
473 vif
->tx_irq
= vif
->rx_irq
= err
;
474 disable_irq(vif
->tx_irq
);
476 /* feature-split-event-channels == 1 */
477 snprintf(vif
->tx_irq_name
, sizeof(vif
->tx_irq_name
),
478 "%s-tx", vif
->dev
->name
);
479 err
= bind_interdomain_evtchn_to_irqhandler(
480 vif
->domid
, tx_evtchn
, xenvif_tx_interrupt
, 0,
481 vif
->tx_irq_name
, vif
);
485 disable_irq(vif
->tx_irq
);
487 snprintf(vif
->rx_irq_name
, sizeof(vif
->rx_irq_name
),
488 "%s-rx", vif
->dev
->name
);
489 err
= bind_interdomain_evtchn_to_irqhandler(
490 vif
->domid
, rx_evtchn
, xenvif_rx_interrupt
, 0,
491 vif
->rx_irq_name
, vif
);
495 disable_irq(vif
->rx_irq
);
498 task
= kthread_create(xenvif_kthread_guest_rx
,
499 (void *)vif
, "%s-guest-rx", vif
->dev
->name
);
501 pr_warn("Could not allocate kthread for %s\n", vif
->dev
->name
);
508 task
= kthread_create(xenvif_dealloc_kthread
,
509 (void *)vif
, "%s-dealloc", vif
->dev
->name
);
511 pr_warn("Could not allocate kthread for %s\n", vif
->dev
->name
);
516 vif
->dealloc_task
= task
;
519 if (!vif
->can_sg
&& vif
->dev
->mtu
> ETH_DATA_LEN
)
520 dev_set_mtu(vif
->dev
, ETH_DATA_LEN
);
521 netdev_update_features(vif
->dev
);
522 netif_carrier_on(vif
->dev
);
523 if (netif_running(vif
->dev
))
527 wake_up_process(vif
->task
);
528 wake_up_process(vif
->dealloc_task
);
533 unbind_from_irqhandler(vif
->rx_irq
, vif
);
536 unbind_from_irqhandler(vif
->tx_irq
, vif
);
539 xenvif_unmap_frontend_rings(vif
);
541 module_put(THIS_MODULE
);
545 void xenvif_carrier_off(struct xenvif
*vif
)
547 struct net_device
*dev
= vif
->dev
;
550 netif_carrier_off(dev
); /* discard queued packets */
551 if (netif_running(dev
))
556 void xenvif_disconnect(struct xenvif
*vif
)
558 if (netif_carrier_ok(vif
->dev
))
559 xenvif_carrier_off(vif
);
562 del_timer_sync(&vif
->wake_queue
);
563 kthread_stop(vif
->task
);
567 if (vif
->dealloc_task
) {
568 kthread_stop(vif
->dealloc_task
);
569 vif
->dealloc_task
= NULL
;
573 if (vif
->tx_irq
== vif
->rx_irq
)
574 unbind_from_irqhandler(vif
->tx_irq
, vif
);
576 unbind_from_irqhandler(vif
->tx_irq
, vif
);
577 unbind_from_irqhandler(vif
->rx_irq
, vif
);
582 xenvif_unmap_frontend_rings(vif
);
585 void xenvif_free(struct xenvif
*vif
)
587 int i
, unmap_timeout
= 0;
588 /* Here we want to avoid timeout messages if an skb can be legitimately
589 * stuck somewhere else. Realistically this could be an another vif's
590 * internal or QDisc queue. That another vif also has this
591 * rx_drain_timeout_msecs timeout, but the timer only ditches the
592 * internal queue. After that, the QDisc queue can put in worst case
593 * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's
594 * internal queue, so we need several rounds of such timeouts until we
595 * can be sure that no another vif should have skb's from us. We are
596 * not sending more skb's, so newly stuck packets are not interesting
599 unsigned int worst_case_skb_lifetime
= (rx_drain_timeout_msecs
/1000) *
600 DIV_ROUND_UP(XENVIF_QUEUE_LENGTH
, (XEN_NETIF_RX_RING_SIZE
/ MAX_SKB_FRAGS
));
602 for (i
= 0; i
< MAX_PENDING_REQS
; ++i
) {
603 if (vif
->grant_tx_handle
[i
] != NETBACK_INVALID_HANDLE
) {
605 schedule_timeout(msecs_to_jiffies(1000));
606 if (unmap_timeout
> worst_case_skb_lifetime
&&
609 "Page still granted! Index: %x\n",
611 /* If there are still unmapped pages, reset the loop to
612 * start checking again. We shouldn't exit here until
613 * dealloc thread and NAPI instance release all the
614 * pages. If a kernel bug causes the skbs to stall
615 * somewhere, the interface cannot be brought down
622 free_xenballooned_pages(MAX_PENDING_REQS
, vif
->mmap_pages
);
624 netif_napi_del(&vif
->napi
);
626 unregister_netdev(vif
->dev
);
628 vfree(vif
->grant_copy_op
);
629 free_netdev(vif
->dev
);
631 module_put(THIS_MODULE
);