]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - net/core/dev.c
[MYRI10GE]: Use LRO.
[mirror_ubuntu-bionic-kernel.git] / net / core / dev.c
index 3ba63aaa3001737adc66d7d9feba6317e5674aeb..29cf00c5d865df30b30ef1407299631ebc24c109 100644 (file)
@@ -220,7 +220,8 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
  *     Device drivers call our routines to queue packets here. We empty the
  *     queue in the local softnet handler.
  */
-DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
+
+DEFINE_PER_CPU(struct softnet_data, softnet_data);
 
 #ifdef CONFIG_SYSFS
 extern int netdev_sysfs_init(void);
@@ -817,7 +818,9 @@ int dev_alloc_name(struct net_device *dev, const char *name)
  */
 int dev_change_name(struct net_device *dev, char *newname)
 {
+       char oldname[IFNAMSIZ];
        int err = 0;
+       int ret;
 
        ASSERT_RTNL();
 
@@ -827,6 +830,8 @@ int dev_change_name(struct net_device *dev, char *newname)
        if (!dev_valid_name(newname))
                return -EINVAL;
 
+       memcpy(oldname, dev->name, IFNAMSIZ);
+
        if (strchr(newname, '%')) {
                err = dev_alloc_name(dev, newname);
                if (err < 0)
@@ -838,10 +843,28 @@ int dev_change_name(struct net_device *dev, char *newname)
        else
                strlcpy(dev->name, newname, IFNAMSIZ);
 
+rollback:
        device_rename(&dev->dev, dev->name);
+
+       write_lock_bh(&dev_base_lock);
        hlist_del(&dev->name_hlist);
        hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
-       raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+       write_unlock_bh(&dev_base_lock);
+
+       ret = raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+       ret = notifier_to_errno(ret);
+
+       if (ret) {
+               if (err) {
+                       printk(KERN_ERR
+                              "%s: name change rollback failed: %d.\n",
+                              dev->name, ret);
+               } else {
+                       err = ret;
+                       memcpy(dev->name, oldname, IFNAMSIZ);
+                       goto rollback;
+               }
+       }
 
        return err;
 }
@@ -996,16 +1019,12 @@ int dev_close(struct net_device *dev)
        clear_bit(__LINK_STATE_START, &dev->state);
 
        /* Synchronize to scheduled poll. We cannot touch poll list,
-        * it can be even on different cpu. So just clear netif_running(),
-        * and wait when poll really will happen. Actually, the best place
-        * for this is inside dev->stop() after device stopped its irq
-        * engine, but this requires more changes in devices. */
-
+        * it can be even on different cpu. So just clear netif_running().
+        *
+        * dev->stop() will invoke napi_disable() on all of it's
+        * napi_struct instances on this device.
+        */
        smp_mb__after_clear_bit(); /* Commit netif_running(). */
-       while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
-               /* No hurry. */
-               msleep(1);
-       }
 
        /*
         *      Call the device specific close. This cannot fail.
@@ -1054,20 +1073,43 @@ int dev_close(struct net_device *dev)
 int register_netdevice_notifier(struct notifier_block *nb)
 {
        struct net_device *dev;
+       struct net_device *last;
        int err;
 
        rtnl_lock();
        err = raw_notifier_chain_register(&netdev_chain, nb);
-       if (!err) {
-               for_each_netdev(dev) {
-                       nb->notifier_call(nb, NETDEV_REGISTER, dev);
+       if (err)
+               goto unlock;
 
-                       if (dev->flags & IFF_UP)
-                               nb->notifier_call(nb, NETDEV_UP, dev);
-               }
+       for_each_netdev(dev) {
+               err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
+               err = notifier_to_errno(err);
+               if (err)
+                       goto rollback;
+
+               if (!(dev->flags & IFF_UP))
+                       continue;
+
+               nb->notifier_call(nb, NETDEV_UP, dev);
        }
+
+unlock:
        rtnl_unlock();
        return err;
+
+rollback:
+       last = dev;
+       for_each_netdev(dev) {
+               if (dev == last)
+                       break;
+
+               if (dev->flags & IFF_UP) {
+                       nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
+                       nb->notifier_call(nb, NETDEV_DOWN, dev);
+               }
+               nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+       }
+       goto unlock;
 }
 
 /**
@@ -1188,21 +1230,21 @@ void __netif_schedule(struct net_device *dev)
 }
 EXPORT_SYMBOL(__netif_schedule);
 
-void __netif_rx_schedule(struct net_device *dev)
+void dev_kfree_skb_irq(struct sk_buff *skb)
 {
-       unsigned long flags;
+       if (atomic_dec_and_test(&skb->users)) {
+               struct softnet_data *sd;
+               unsigned long flags;
 
-       local_irq_save(flags);
-       dev_hold(dev);
-       list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
-       if (dev->quota < 0)
-               dev->quota += dev->weight;
-       else
-               dev->quota = dev->weight;
-       __raise_softirq_irqoff(NET_RX_SOFTIRQ);
-       local_irq_restore(flags);
+               local_irq_save(flags);
+               sd = &__get_cpu_var(softnet_data);
+               skb->next = sd->completion_queue;
+               sd->completion_queue = skb;
+               raise_softirq_irqoff(NET_TX_SOFTIRQ);
+               local_irq_restore(flags);
+       }
 }
-EXPORT_SYMBOL(__netif_rx_schedule);
+EXPORT_SYMBOL(dev_kfree_skb_irq);
 
 void dev_kfree_skb_any(struct sk_buff *skb)
 {
@@ -1214,7 +1256,12 @@ void dev_kfree_skb_any(struct sk_buff *skb)
 EXPORT_SYMBOL(dev_kfree_skb_any);
 
 
-/* Hot-plugging. */
+/**
+ * netif_device_detach - mark device as removed
+ * @dev: network device
+ *
+ * Mark device as removed from system and therefore no longer available.
+ */
 void netif_device_detach(struct net_device *dev)
 {
        if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
@@ -1224,6 +1271,12 @@ void netif_device_detach(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_device_detach);
 
+/**
+ * netif_device_attach - mark device as attached
+ * @dev: network device
+ *
+ * Mark device as attached from system and restart if needed.
+ */
 void netif_device_attach(struct net_device *dev)
 {
        if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
@@ -1685,7 +1738,7 @@ enqueue:
                        return NET_RX_SUCCESS;
                }
 
-               netif_rx_schedule(&queue->backlog_dev);
+               napi_schedule(&queue->backlog);
                goto enqueue;
        }
 
@@ -1726,6 +1779,7 @@ static inline struct net_device *skb_bond(struct sk_buff *skb)
        return dev;
 }
 
+
 static void net_tx_action(struct softirq_action *h)
 {
        struct softnet_data *sd = &__get_cpu_var(softnet_data);
@@ -1882,7 +1936,7 @@ int netif_receive_skb(struct sk_buff *skb)
        __be16 type;
 
        /* if we've gotten here through NAPI, check netpoll */
-       if (skb->dev->poll && netpoll_rx(skb))
+       if (netpoll_receive_skb(skb))
                return NET_RX_DROP;
 
        if (!skb->tstamp.tv64)
@@ -1972,22 +2026,25 @@ out:
        return ret;
 }
 
-static int process_backlog(struct net_device *backlog_dev, int *budget)
+static int process_backlog(struct napi_struct *napi, int quota)
 {
        int work = 0;
-       int quota = min(backlog_dev->quota, *budget);
        struct softnet_data *queue = &__get_cpu_var(softnet_data);
        unsigned long start_time = jiffies;
 
-       backlog_dev->weight = weight_p;
-       for (;;) {
+       napi->weight = weight_p;
+       do {
                struct sk_buff *skb;
                struct net_device *dev;
 
                local_irq_disable();
                skb = __skb_dequeue(&queue->input_pkt_queue);
-               if (!skb)
-                       goto job_done;
+               if (!skb) {
+                       __napi_complete(napi);
+                       local_irq_enable();
+                       break;
+               }
+
                local_irq_enable();
 
                dev = skb->dev;
@@ -1995,67 +2052,86 @@ static int process_backlog(struct net_device *backlog_dev, int *budget)
                netif_receive_skb(skb);
 
                dev_put(dev);
+       } while (++work < quota && jiffies == start_time);
 
-               work++;
-
-               if (work >= quota || jiffies - start_time > 1)
-                       break;
-
-       }
-
-       backlog_dev->quota -= work;
-       *budget -= work;
-       return -1;
-
-job_done:
-       backlog_dev->quota -= work;
-       *budget -= work;
+       return work;
+}
 
-       list_del(&backlog_dev->poll_list);
-       smp_mb__before_clear_bit();
-       netif_poll_enable(backlog_dev);
+/**
+ * __napi_schedule - schedule for receive
+ * @napi: entry to schedule
+ *
+ * The entry's receive function will be scheduled to run
+ */
+void fastcall __napi_schedule(struct napi_struct *n)
+{
+       unsigned long flags;
 
-       local_irq_enable();
-       return 0;
+       local_irq_save(flags);
+       list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
+       __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+       local_irq_restore(flags);
 }
+EXPORT_SYMBOL(__napi_schedule);
+
 
 static void net_rx_action(struct softirq_action *h)
 {
-       struct softnet_data *queue = &__get_cpu_var(softnet_data);
+       struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
        unsigned long start_time = jiffies;
        int budget = netdev_budget;
        void *have;
 
        local_irq_disable();
 
-       while (!list_empty(&queue->poll_list)) {
-               struct net_device *dev;
+       while (!list_empty(list)) {
+               struct napi_struct *n;
+               int work, weight;
 
-               if (budget <= 0 || jiffies - start_time > 1)
+               /* If softirq window is exhuasted then punt.
+                *
+                * Note that this is a slight policy change from the
+                * previous NAPI code, which would allow up to 2
+                * jiffies to pass before breaking out.  The test
+                * used to be "jiffies - start_time > 1".
+                */
+               if (unlikely(budget <= 0 || jiffies != start_time))
                        goto softnet_break;
 
                local_irq_enable();
 
-               dev = list_entry(queue->poll_list.next,
-                                struct net_device, poll_list);
-               have = netpoll_poll_lock(dev);
+               /* Even though interrupts have been re-enabled, this
+                * access is safe because interrupts can only add new
+                * entries to the tail of this list, and only ->poll()
+                * calls can remove this head entry from the list.
+                */
+               n = list_entry(list->next, struct napi_struct, poll_list);
 
-               if (dev->quota <= 0 || dev->poll(dev, &budget)) {
-                       netpoll_poll_unlock(have);
-                       local_irq_disable();
-                       list_move_tail(&dev->poll_list, &queue->poll_list);
-                       if (dev->quota < 0)
-                               dev->quota += dev->weight;
-                       else
-                               dev->quota = dev->weight;
-               } else {
-                       netpoll_poll_unlock(have);
-                       dev_put(dev);
-                       local_irq_disable();
-               }
+               have = netpoll_poll_lock(n);
+
+               weight = n->weight;
+
+               work = n->poll(n, weight);
+
+               WARN_ON_ONCE(work > weight);
+
+               budget -= work;
+
+               local_irq_disable();
+
+               /* Drivers must not modify the NAPI state if they
+                * consume the entire weight.  In such cases this code
+                * still "owns" the NAPI instance and therefore can
+                * move the instance around on the list at-will.
+                */
+               if (unlikely(work == weight))
+                       list_move_tail(&n->poll_list, list);
+
+               netpoll_poll_unlock(have);
        }
 out:
        local_irq_enable();
+
 #ifdef CONFIG_NET_DMA
        /*
         * There may not be any more sk_buffs coming right now, so push
@@ -2070,6 +2146,7 @@ out:
                }
        }
 #endif
+
        return;
 
 softnet_break:
@@ -2629,7 +2706,7 @@ void __dev_set_rx_mode(struct net_device *dev)
                return;
 
        if (!netif_device_present(dev))
-               return;
+               return;
 
        if (dev->set_rx_mode)
                dev->set_rx_mode(dev);
@@ -2715,26 +2792,14 @@ int __dev_addr_add(struct dev_addr_list **list, int *count,
        return 0;
 }
 
-static void __dev_addr_discard(struct dev_addr_list **list)
-{
-       struct dev_addr_list *tmp;
-
-       while (*list != NULL) {
-               tmp = *list;
-               *list = tmp->next;
-               if (tmp->da_users > tmp->da_gusers)
-                       printk("__dev_addr_discard: address leakage! "
-                              "da_users=%d\n", tmp->da_users);
-               kfree(tmp);
-       }
-}
-
 /**
  *     dev_unicast_delete      - Release secondary unicast address.
  *     @dev: device
+ *     @addr: address to delete
+ *     @alen: length of @addr
  *
  *     Release reference to a secondary unicast address and remove it
- *     from the device if the reference count drop to zero.
+ *     from the device if the reference count drops to zero.
  *
  *     The caller must hold the rtnl_mutex.
  */
@@ -2756,6 +2821,8 @@ EXPORT_SYMBOL(dev_unicast_delete);
 /**
  *     dev_unicast_add         - add a secondary unicast address
  *     @dev: device
+ *     @addr: address to delete
+ *     @alen: length of @addr
  *
  *     Add a secondary unicast address to the device or increase
  *     the reference count if it already exists.
@@ -2777,23 +2844,30 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen)
 }
 EXPORT_SYMBOL(dev_unicast_add);
 
-static void dev_unicast_discard(struct net_device *dev)
+static void __dev_addr_discard(struct dev_addr_list **list)
 {
-       netif_tx_lock_bh(dev);
-       __dev_addr_discard(&dev->uc_list);
-       dev->uc_count = 0;
-       netif_tx_unlock_bh(dev);
-}
+       struct dev_addr_list *tmp;
 
-/*
- *     Discard multicast list when a device is downed
- */
+       while (*list != NULL) {
+               tmp = *list;
+               *list = tmp->next;
+               if (tmp->da_users > tmp->da_gusers)
+                       printk("__dev_addr_discard: address leakage! "
+                              "da_users=%d\n", tmp->da_users);
+               kfree(tmp);
+       }
+}
 
-static void dev_mc_discard(struct net_device *dev)
+static void dev_addr_discard(struct net_device *dev)
 {
        netif_tx_lock_bh(dev);
+
+       __dev_addr_discard(&dev->uc_list);
+       dev->uc_count = 0;
+
        __dev_addr_discard(&dev->mc_list);
        dev->mc_count = 0;
+
        netif_tx_unlock_bh(dev);
 }
 
@@ -3340,7 +3414,7 @@ int register_netdevice(struct net_device *dev)
 
        if (!dev_valid_name(dev->name)) {
                ret = -EINVAL;
-               goto out;
+               goto err_uninit;
        }
 
        dev->ifindex = dev_new_index();
@@ -3354,7 +3428,7 @@ int register_netdevice(struct net_device *dev)
                        = hlist_entry(p, struct net_device, name_hlist);
                if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
                        ret = -EEXIST;
-                       goto out;
+                       goto err_uninit;
                }
        }
 
@@ -3414,7 +3488,7 @@ int register_netdevice(struct net_device *dev)
 
        ret = netdev_register_sysfs(dev);
        if (ret)
-               goto out;
+               goto err_uninit;
        dev->reg_state = NETREG_REGISTERED;
 
        /*
@@ -3433,12 +3507,18 @@ int register_netdevice(struct net_device *dev)
        write_unlock_bh(&dev_base_lock);
 
        /* Notify protocols, that a new device appeared. */
-       raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
-
-       ret = 0;
+       ret = raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+       ret = notifier_to_errno(ret);
+       if (ret)
+               unregister_netdevice(dev);
 
 out:
        return ret;
+
+err_uninit:
+       if (dev->uninit)
+               dev->uninit(dev);
+       goto out;
 }
 
 /**
@@ -3631,7 +3711,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
        /* ensure 32-byte alignment of both the device and private area */
        alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
-                    (sizeof(struct net_device_subqueue) * queue_count)) &
+                    (sizeof(struct net_device_subqueue) * (queue_count - 1))) &
                     ~NETDEV_ALIGN_CONST;
        alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
 
@@ -3649,13 +3729,14 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
                dev->priv = ((char *)dev +
                             ((sizeof(struct net_device) +
                               (sizeof(struct net_device_subqueue) *
-                               queue_count) + NETDEV_ALIGN_CONST)
+                               (queue_count - 1)) + NETDEV_ALIGN_CONST)
                              & ~NETDEV_ALIGN_CONST));
        }
 
        dev->egress_subqueue_count = queue_count;
 
        dev->get_stats = internal_stats;
+       netpoll_netdev_init(dev);
        setup(dev);
        strcpy(dev->name, name);
        return dev;
@@ -3751,8 +3832,7 @@ void unregister_netdevice(struct net_device *dev)
        /*
         *      Flush the unicast and multicast chains
         */
-       dev_unicast_discard(dev);
-       dev_mc_discard(dev);
+       dev_addr_discard(dev);
 
        if (dev->uninit)
                dev->uninit(dev);
@@ -3838,9 +3918,11 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 
 #ifdef CONFIG_NET_DMA
 /**
- * net_dma_rebalance -
- * This is called when the number of channels allocated to the net_dma_client
- * changes.  The net_dma_client tries to have one DMA channel per CPU.
+ * net_dma_rebalance - try to maintain one DMA channel per CPU
+ * @net_dma: DMA client and associated data (lock, channels, channel_mask)
+ *
+ * This is called when the number of channels allocated to the net_dma client
+ * changes.  The net_dma client tries to have one DMA channel per CPU.
  */
 
 static void net_dma_rebalance(struct net_dma *net_dma)
@@ -3877,7 +3959,7 @@ static void net_dma_rebalance(struct net_dma *net_dma)
  * netdev_dma_event - event callback for the net_dma_client
  * @client: should always be net_dma_client
  * @chan: DMA channel for the event
- * @event: event type
+ * @state: DMA state to be handled
  */
 static enum dma_state_client
 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
@@ -3944,6 +4026,45 @@ static int __init netdev_dma_register(void)
 static int __init netdev_dma_register(void) { return -ENODEV; }
 #endif /* CONFIG_NET_DMA */
 
+/**
+ *     netdev_compute_feature - compute conjunction of two feature sets
+ *     @all: first feature set
+ *     @one: second feature set
+ *
+ *     Computes a new feature set after adding a device with feature set
+ *     @one to the master device with current feature set @all.  Returns
+ *     the new feature set.
+ */
+int netdev_compute_features(unsigned long all, unsigned long one)
+{
+       /* if device needs checksumming, downgrade to hw checksumming */
+       if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
+               all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
+
+       /* if device can't do all checksum, downgrade to ipv4/ipv6 */
+       if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
+               all ^= NETIF_F_HW_CSUM
+                       | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+
+       if (one & NETIF_F_GSO)
+               one |= NETIF_F_GSO_SOFTWARE;
+       one |= NETIF_F_GSO;
+
+       /* If even one device supports robust GSO, enable it for all. */
+       if (one & NETIF_F_GSO_ROBUST)
+               all |= NETIF_F_GSO_ROBUST;
+
+       all &= one | NETIF_F_LLTX;
+
+       if (!(all & NETIF_F_ALL_CSUM))
+               all &= ~NETIF_F_SG;
+       if (!(all & NETIF_F_SG))
+               all &= ~NETIF_F_GSO_MASK;
+
+       return all;
+}
+EXPORT_SYMBOL(netdev_compute_features);
+
 /*
  *     Initialize the DEV module. At boot time this walks the device list and
  *     unhooks any devices that fail to initialise (normally hardware not
@@ -3988,10 +4109,9 @@ static int __init net_dev_init(void)
                skb_queue_head_init(&queue->input_pkt_queue);
                queue->completion_queue = NULL;
                INIT_LIST_HEAD(&queue->poll_list);
-               set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
-               queue->backlog_dev.weight = weight_p;
-               queue->backlog_dev.poll = process_backlog;
-               atomic_set(&queue->backlog_dev.refcnt, 1);
+
+               queue->backlog.poll = process_backlog;
+               queue->backlog.weight = weight_p;
        }
 
        netdev_dma_register();