* Device drivers call our routines to queue packets here. We empty the
* queue in the local softnet handler.
*/
-DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
+
+DEFINE_PER_CPU(struct softnet_data, softnet_data);
#ifdef CONFIG_SYSFS
extern int netdev_sysfs_init(void);
*/
int dev_change_name(struct net_device *dev, char *newname)
{
+ char oldname[IFNAMSIZ];
int err = 0;
+ int ret;
ASSERT_RTNL();
if (!dev_valid_name(newname))
return -EINVAL;
+ memcpy(oldname, dev->name, IFNAMSIZ);
+
if (strchr(newname, '%')) {
err = dev_alloc_name(dev, newname);
if (err < 0)
else
strlcpy(dev->name, newname, IFNAMSIZ);
+rollback:
device_rename(&dev->dev, dev->name);
+
+ write_lock_bh(&dev_base_lock);
hlist_del(&dev->name_hlist);
hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
- raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+ write_unlock_bh(&dev_base_lock);
+
+ ret = raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+ ret = notifier_to_errno(ret);
+
+ if (ret) {
+ if (err) {
+ printk(KERN_ERR
+ "%s: name change rollback failed: %d.\n",
+ dev->name, ret);
+ } else {
+ err = ret;
+ memcpy(dev->name, oldname, IFNAMSIZ);
+ goto rollback;
+ }
+ }
return err;
}
clear_bit(__LINK_STATE_START, &dev->state);
/* Synchronize to scheduled poll. We cannot touch poll list,
- * it can be even on different cpu. So just clear netif_running(),
- * and wait when poll really will happen. Actually, the best place
- * for this is inside dev->stop() after device stopped its irq
- * engine, but this requires more changes in devices. */
-
+ * it can be even on different cpu. So just clear netif_running().
+ *
+ * dev->stop() will invoke napi_disable() on all of it's
+ * napi_struct instances on this device.
+ */
smp_mb__after_clear_bit(); /* Commit netif_running(). */
- while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
- /* No hurry. */
- msleep(1);
- }
/*
* Call the device specific close. This cannot fail.
int register_netdevice_notifier(struct notifier_block *nb)
{
struct net_device *dev;
+ struct net_device *last;
int err;
rtnl_lock();
err = raw_notifier_chain_register(&netdev_chain, nb);
- if (!err) {
- for_each_netdev(dev) {
- nb->notifier_call(nb, NETDEV_REGISTER, dev);
+ if (err)
+ goto unlock;
- if (dev->flags & IFF_UP)
- nb->notifier_call(nb, NETDEV_UP, dev);
- }
+ for_each_netdev(dev) {
+ err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
+ err = notifier_to_errno(err);
+ if (err)
+ goto rollback;
+
+ if (!(dev->flags & IFF_UP))
+ continue;
+
+ nb->notifier_call(nb, NETDEV_UP, dev);
}
+
+unlock:
rtnl_unlock();
return err;
+
+rollback:
+ last = dev;
+ for_each_netdev(dev) {
+ if (dev == last)
+ break;
+
+ if (dev->flags & IFF_UP) {
+ nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
+ nb->notifier_call(nb, NETDEV_DOWN, dev);
+ }
+ nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+ }
+ goto unlock;
}
/**
}
EXPORT_SYMBOL(__netif_schedule);
-void __netif_rx_schedule(struct net_device *dev)
+void dev_kfree_skb_irq(struct sk_buff *skb)
{
- unsigned long flags;
+ if (atomic_dec_and_test(&skb->users)) {
+ struct softnet_data *sd;
+ unsigned long flags;
- local_irq_save(flags);
- dev_hold(dev);
- list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
- if (dev->quota < 0)
- dev->quota += dev->weight;
- else
- dev->quota = dev->weight;
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
- local_irq_restore(flags);
+ local_irq_save(flags);
+ sd = &__get_cpu_var(softnet_data);
+ skb->next = sd->completion_queue;
+ sd->completion_queue = skb;
+ raise_softirq_irqoff(NET_TX_SOFTIRQ);
+ local_irq_restore(flags);
+ }
}
-EXPORT_SYMBOL(__netif_rx_schedule);
+EXPORT_SYMBOL(dev_kfree_skb_irq);
void dev_kfree_skb_any(struct sk_buff *skb)
{
EXPORT_SYMBOL(dev_kfree_skb_any);
-/* Hot-plugging. */
+/**
+ * netif_device_detach - mark device as removed
+ * @dev: network device
+ *
+ * Mark device as removed from system and therefore no longer available.
+ */
void netif_device_detach(struct net_device *dev)
{
if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
}
EXPORT_SYMBOL(netif_device_detach);
+/**
+ * netif_device_attach - mark device as attached
+ * @dev: network device
+ *
+ * Mark device as attached from system and restart if needed.
+ */
void netif_device_attach(struct net_device *dev)
{
if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
return NET_RX_SUCCESS;
}
- netif_rx_schedule(&queue->backlog_dev);
+ napi_schedule(&queue->backlog);
goto enqueue;
}
return dev;
}
+
static void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = &__get_cpu_var(softnet_data);
__be16 type;
/* if we've gotten here through NAPI, check netpoll */
- if (skb->dev->poll && netpoll_rx(skb))
+ if (netpoll_receive_skb(skb))
return NET_RX_DROP;
if (!skb->tstamp.tv64)
return ret;
}
-static int process_backlog(struct net_device *backlog_dev, int *budget)
+static int process_backlog(struct napi_struct *napi, int quota)
{
int work = 0;
- int quota = min(backlog_dev->quota, *budget);
struct softnet_data *queue = &__get_cpu_var(softnet_data);
unsigned long start_time = jiffies;
- backlog_dev->weight = weight_p;
- for (;;) {
+ napi->weight = weight_p;
+ do {
struct sk_buff *skb;
struct net_device *dev;
local_irq_disable();
skb = __skb_dequeue(&queue->input_pkt_queue);
- if (!skb)
- goto job_done;
+ if (!skb) {
+ __napi_complete(napi);
+ local_irq_enable();
+ break;
+ }
+
local_irq_enable();
dev = skb->dev;
netif_receive_skb(skb);
dev_put(dev);
+ } while (++work < quota && jiffies == start_time);
- work++;
-
- if (work >= quota || jiffies - start_time > 1)
- break;
-
- }
-
- backlog_dev->quota -= work;
- *budget -= work;
- return -1;
-
-job_done:
- backlog_dev->quota -= work;
- *budget -= work;
+ return work;
+}
- list_del(&backlog_dev->poll_list);
- smp_mb__before_clear_bit();
- netif_poll_enable(backlog_dev);
+/**
+ * __napi_schedule - schedule for receive
+ * @napi: entry to schedule
+ *
+ * The entry's receive function will be scheduled to run
+ */
+void fastcall __napi_schedule(struct napi_struct *n)
+{
+ unsigned long flags;
- local_irq_enable();
- return 0;
+ local_irq_save(flags);
+ list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ local_irq_restore(flags);
}
+EXPORT_SYMBOL(__napi_schedule);
+
static void net_rx_action(struct softirq_action *h)
{
- struct softnet_data *queue = &__get_cpu_var(softnet_data);
+ struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
unsigned long start_time = jiffies;
int budget = netdev_budget;
void *have;
local_irq_disable();
- while (!list_empty(&queue->poll_list)) {
- struct net_device *dev;
+ while (!list_empty(list)) {
+ struct napi_struct *n;
+ int work, weight;
- if (budget <= 0 || jiffies - start_time > 1)
+ /* If softirq window is exhuasted then punt.
+ *
+ * Note that this is a slight policy change from the
+ * previous NAPI code, which would allow up to 2
+ * jiffies to pass before breaking out. The test
+ * used to be "jiffies - start_time > 1".
+ */
+ if (unlikely(budget <= 0 || jiffies != start_time))
goto softnet_break;
local_irq_enable();
- dev = list_entry(queue->poll_list.next,
- struct net_device, poll_list);
- have = netpoll_poll_lock(dev);
+ /* Even though interrupts have been re-enabled, this
+ * access is safe because interrupts can only add new
+ * entries to the tail of this list, and only ->poll()
+ * calls can remove this head entry from the list.
+ */
+ n = list_entry(list->next, struct napi_struct, poll_list);
- if (dev->quota <= 0 || dev->poll(dev, &budget)) {
- netpoll_poll_unlock(have);
- local_irq_disable();
- list_move_tail(&dev->poll_list, &queue->poll_list);
- if (dev->quota < 0)
- dev->quota += dev->weight;
- else
- dev->quota = dev->weight;
- } else {
- netpoll_poll_unlock(have);
- dev_put(dev);
- local_irq_disable();
- }
+ have = netpoll_poll_lock(n);
+
+ weight = n->weight;
+
+ work = n->poll(n, weight);
+
+ WARN_ON_ONCE(work > weight);
+
+ budget -= work;
+
+ local_irq_disable();
+
+ /* Drivers must not modify the NAPI state if they
+ * consume the entire weight. In such cases this code
+ * still "owns" the NAPI instance and therefore can
+ * move the instance around on the list at-will.
+ */
+ if (unlikely(work == weight))
+ list_move_tail(&n->poll_list, list);
+
+ netpoll_poll_unlock(have);
}
out:
local_irq_enable();
+
#ifdef CONFIG_NET_DMA
/*
* There may not be any more sk_buffs coming right now, so push
}
}
#endif
+
return;
softnet_break:
return;
if (!netif_device_present(dev))
- return;
+ return;
if (dev->set_rx_mode)
dev->set_rx_mode(dev);
return 0;
}
-static void __dev_addr_discard(struct dev_addr_list **list)
-{
- struct dev_addr_list *tmp;
-
- while (*list != NULL) {
- tmp = *list;
- *list = tmp->next;
- if (tmp->da_users > tmp->da_gusers)
- printk("__dev_addr_discard: address leakage! "
- "da_users=%d\n", tmp->da_users);
- kfree(tmp);
- }
-}
-
/**
* dev_unicast_delete - Release secondary unicast address.
* @dev: device
+ * @addr: address to delete
+ * @alen: length of @addr
*
* Release reference to a secondary unicast address and remove it
- * from the device if the reference count drop to zero.
+ * from the device if the reference count drops to zero.
*
* The caller must hold the rtnl_mutex.
*/
/**
* dev_unicast_add - add a secondary unicast address
* @dev: device
+ * @addr: address to delete
+ * @alen: length of @addr
*
* Add a secondary unicast address to the device or increase
* the reference count if it already exists.
}
EXPORT_SYMBOL(dev_unicast_add);
-static void dev_unicast_discard(struct net_device *dev)
+static void __dev_addr_discard(struct dev_addr_list **list)
{
- netif_tx_lock_bh(dev);
- __dev_addr_discard(&dev->uc_list);
- dev->uc_count = 0;
- netif_tx_unlock_bh(dev);
-}
+ struct dev_addr_list *tmp;
-/*
- * Discard multicast list when a device is downed
- */
+ while (*list != NULL) {
+ tmp = *list;
+ *list = tmp->next;
+ if (tmp->da_users > tmp->da_gusers)
+ printk("__dev_addr_discard: address leakage! "
+ "da_users=%d\n", tmp->da_users);
+ kfree(tmp);
+ }
+}
-static void dev_mc_discard(struct net_device *dev)
+static void dev_addr_discard(struct net_device *dev)
{
netif_tx_lock_bh(dev);
+
+ __dev_addr_discard(&dev->uc_list);
+ dev->uc_count = 0;
+
__dev_addr_discard(&dev->mc_list);
dev->mc_count = 0;
+
netif_tx_unlock_bh(dev);
}
if (!dev_valid_name(dev->name)) {
ret = -EINVAL;
- goto out;
+ goto err_uninit;
}
dev->ifindex = dev_new_index();
= hlist_entry(p, struct net_device, name_hlist);
if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
ret = -EEXIST;
- goto out;
+ goto err_uninit;
}
}
ret = netdev_register_sysfs(dev);
if (ret)
- goto out;
+ goto err_uninit;
dev->reg_state = NETREG_REGISTERED;
/*
write_unlock_bh(&dev_base_lock);
/* Notify protocols, that a new device appeared. */
- raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
-
- ret = 0;
+ ret = raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+ ret = notifier_to_errno(ret);
+ if (ret)
+ unregister_netdevice(dev);
out:
return ret;
+
+err_uninit:
+ if (dev->uninit)
+ dev->uninit(dev);
+ goto out;
}
/**
/* ensure 32-byte alignment of both the device and private area */
alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
- (sizeof(struct net_device_subqueue) * queue_count)) &
+ (sizeof(struct net_device_subqueue) * (queue_count - 1))) &
~NETDEV_ALIGN_CONST;
alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
dev->priv = ((char *)dev +
((sizeof(struct net_device) +
(sizeof(struct net_device_subqueue) *
- queue_count) + NETDEV_ALIGN_CONST)
+ (queue_count - 1)) + NETDEV_ALIGN_CONST)
& ~NETDEV_ALIGN_CONST));
}
dev->egress_subqueue_count = queue_count;
dev->get_stats = internal_stats;
+ netpoll_netdev_init(dev);
setup(dev);
strcpy(dev->name, name);
return dev;
/*
* Flush the unicast and multicast chains
*/
- dev_unicast_discard(dev);
- dev_mc_discard(dev);
+ dev_addr_discard(dev);
if (dev->uninit)
dev->uninit(dev);
#ifdef CONFIG_NET_DMA
/**
- * net_dma_rebalance -
- * This is called when the number of channels allocated to the net_dma_client
- * changes. The net_dma_client tries to have one DMA channel per CPU.
+ * net_dma_rebalance - try to maintain one DMA channel per CPU
+ * @net_dma: DMA client and associated data (lock, channels, channel_mask)
+ *
+ * This is called when the number of channels allocated to the net_dma client
+ * changes. The net_dma client tries to have one DMA channel per CPU.
*/
static void net_dma_rebalance(struct net_dma *net_dma)
* netdev_dma_event - event callback for the net_dma_client
* @client: should always be net_dma_client
* @chan: DMA channel for the event
- * @event: event type
+ * @state: DMA state to be handled
*/
static enum dma_state_client
netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
static int __init netdev_dma_register(void) { return -ENODEV; }
#endif /* CONFIG_NET_DMA */
+/**
+ * netdev_compute_feature - compute conjunction of two feature sets
+ * @all: first feature set
+ * @one: second feature set
+ *
+ * Computes a new feature set after adding a device with feature set
+ * @one to the master device with current feature set @all. Returns
+ * the new feature set.
+ */
+int netdev_compute_features(unsigned long all, unsigned long one)
+{
+ /* if device needs checksumming, downgrade to hw checksumming */
+ if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
+ all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
+
+ /* if device can't do all checksum, downgrade to ipv4/ipv6 */
+ if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
+ all ^= NETIF_F_HW_CSUM
+ | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+
+ if (one & NETIF_F_GSO)
+ one |= NETIF_F_GSO_SOFTWARE;
+ one |= NETIF_F_GSO;
+
+ /* If even one device supports robust GSO, enable it for all. */
+ if (one & NETIF_F_GSO_ROBUST)
+ all |= NETIF_F_GSO_ROBUST;
+
+ all &= one | NETIF_F_LLTX;
+
+ if (!(all & NETIF_F_ALL_CSUM))
+ all &= ~NETIF_F_SG;
+ if (!(all & NETIF_F_SG))
+ all &= ~NETIF_F_GSO_MASK;
+
+ return all;
+}
+EXPORT_SYMBOL(netdev_compute_features);
+
/*
* Initialize the DEV module. At boot time this walks the device list and
* unhooks any devices that fail to initialise (normally hardware not
skb_queue_head_init(&queue->input_pkt_queue);
queue->completion_queue = NULL;
INIT_LIST_HEAD(&queue->poll_list);
- set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
- queue->backlog_dev.weight = weight_p;
- queue->backlog_dev.poll = process_backlog;
- atomic_set(&queue->backlog_dev.refcnt, 1);
+
+ queue->backlog.poll = process_backlog;
+ queue->backlog.weight = weight_p;
}
netdev_dma_register();