]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blobdiff - drivers/net/ethernet/amazon/ena/ena_netdev.c
net: ena: fix bug that might cause hang after consecutive open/close interface.
[mirror_ubuntu-zesty-kernel.git] / drivers / net / ethernet / amazon / ena / ena_netdev.c
index cc8b13ebfa75a8b7c0757ce861589c7b7b075149..ae791de66870e77f1a5849350a1d6e5b7f64f92f 100644 (file)
@@ -80,14 +80,18 @@ static void ena_tx_timeout(struct net_device *dev)
 {
        struct ena_adapter *adapter = netdev_priv(dev);
 
+       /* Change the state of the device to trigger reset
+        * Check that we are not in the middle or a trigger already
+        */
+
+       if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+               return;
+
        u64_stats_update_begin(&adapter->syncp);
        adapter->dev_stats.tx_timeout++;
        u64_stats_update_end(&adapter->syncp);
 
        netif_err(adapter, tx_err, dev, "Transmit time out\n");
-
-       /* Change the state of the device to trigger reset */
-       set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
 }
 
 static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
@@ -129,7 +133,7 @@ static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
                int irq_idx = ENA_IO_IRQ_IDX(i);
 
                rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
-                                     adapter->msix_entries[irq_idx].vector);
+                                     pci_irq_vector(adapter->pdev, irq_idx));
                if (rc) {
                        free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
                        adapter->netdev->rx_cpu_rmap = NULL;
@@ -559,6 +563,7 @@ static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
  */
 static void ena_free_tx_bufs(struct ena_ring *tx_ring)
 {
+       bool print_once = true;
        u32 i;
 
        for (i = 0; i < tx_ring->ring_size; i++) {
@@ -570,9 +575,16 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring)
                if (!tx_info->skb)
                        continue;
 
-               netdev_notice(tx_ring->netdev,
-                             "free uncompleted tx skb qid %d idx 0x%x\n",
-                             tx_ring->qid, i);
+               if (print_once) {
+                       netdev_notice(tx_ring->netdev,
+                                     "free uncompleted tx skb qid %d idx 0x%x\n",
+                                     tx_ring->qid, i);
+                       print_once = false;
+               } else {
+                       netdev_dbg(tx_ring->netdev,
+                                  "free uncompleted tx skb qid %d idx 0x%x\n",
+                                  tx_ring->qid, i);
+               }
 
                ena_buf = tx_info->bufs;
                dma_unmap_single(tx_ring->dev,
@@ -1066,6 +1078,26 @@ inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring,
        rx_ring->per_napi_bytes = 0;
 }
 
+static inline void ena_unmask_interrupt(struct ena_ring *tx_ring,
+                                       struct ena_ring *rx_ring)
+{
+       struct ena_eth_io_intr_reg intr_reg;
+
+       /* Update intr register: rx intr delay,
+        * tx intr delay and interrupt unmask
+        */
+       ena_com_update_intr_reg(&intr_reg,
+                               rx_ring->smoothed_interval,
+                               tx_ring->smoothed_interval,
+                               true);
+
+       /* It is a shared MSI-X.
+        * Tx and Rx CQ have pointer to it.
+        * So we use one of them to reach the intr reg
+        */
+       ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+}
+
 static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring,
                                             struct ena_ring *rx_ring)
 {
@@ -1096,7 +1128,6 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
 {
        struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
        struct ena_ring *tx_ring, *rx_ring;
-       struct ena_eth_io_intr_reg intr_reg;
 
        u32 tx_work_done;
        u32 rx_work_done;
@@ -1109,7 +1140,8 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
 
        tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
 
-       if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
+       if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
+           test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
                napi_complete_done(napi, 0);
                return 0;
        }
@@ -1117,26 +1149,27 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
        tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
        rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
 
-       if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
-               napi_complete_done(napi, rx_work_done);
+       /* If the device is about to reset or down, avoid unmask
+        * the interrupt and return 0 so NAPI won't reschedule
+        */
+       if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
+                    test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags))) {
+               napi_complete_done(napi, 0);
+               ret = 0;
 
+       } else if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
                napi_comp_call = 1;
-               /* Tx and Rx share the same interrupt vector */
-               if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
-                       ena_adjust_intr_moderation(rx_ring, tx_ring);
 
-               /* Update intr register: rx intr delay, tx intr delay and
-                * interrupt unmask
+               /* Update numa and unmask the interrupt only when schedule
+                * from the interrupt context (vs from sk_busy_loop)
                 */
-               ena_com_update_intr_reg(&intr_reg,
-                                       rx_ring->smoothed_interval,
-                                       tx_ring->smoothed_interval,
-                                       true);
+               if (napi_complete_done(napi, rx_work_done)) {
+                       /* Tx and Rx share the same interrupt vector */
+                       if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
+                               ena_adjust_intr_moderation(rx_ring, tx_ring);
 
-               /* It is a shared MSI-X. Tx and Rx CQ have pointer to it.
-                * So we use one of them to reach the intr reg
-                */
-               ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+                       ena_unmask_interrupt(tx_ring, rx_ring);
+               }
 
                ena_update_ring_numa_node(tx_ring, rx_ring);
 
@@ -1181,13 +1214,7 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data)
 
 static int ena_enable_msix(struct ena_adapter *adapter, int num_queues)
 {
-       int i, msix_vecs, rc;
-
-       if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
-               netif_err(adapter, probe, adapter->netdev,
-                         "Error, MSI-X is already enabled\n");
-               return -EPERM;
-       }
+       int msix_vecs, rc;
 
        /* Reserved the max msix vectors we might need */
        msix_vecs = ENA_MAX_MSIX_VEC(num_queues);
@@ -1195,16 +1222,9 @@ static int ena_enable_msix(struct ena_adapter *adapter, int num_queues)
        netif_dbg(adapter, probe, adapter->netdev,
                  "trying to enable MSI-X, vectors %d\n", msix_vecs);
 
-       adapter->msix_entries = vzalloc(msix_vecs * sizeof(struct msix_entry));
-
-       if (!adapter->msix_entries)
-               return -ENOMEM;
-
-       for (i = 0; i < msix_vecs; i++)
-               adapter->msix_entries[i].entry = i;
-
-       rc = pci_enable_msix(adapter->pdev, adapter->msix_entries, msix_vecs);
-       if (rc != 0) {
+       rc = pci_alloc_irq_vectors(adapter->pdev, msix_vecs, msix_vecs,
+                       PCI_IRQ_MSIX);
+       if (rc < 0) {
                netif_err(adapter, probe, adapter->netdev,
                          "Failed to enable MSI-X, vectors %d rc %d\n",
                          msix_vecs, rc);
@@ -1221,7 +1241,6 @@ static int ena_enable_msix(struct ena_adapter *adapter, int num_queues)
        }
 
        adapter->msix_vecs = msix_vecs;
-       set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags);
 
        return 0;
 }
@@ -1237,7 +1256,7 @@ static void ena_setup_mgmnt_intr(struct ena_adapter *adapter)
                ena_intr_msix_mgmnt;
        adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
        adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
-               adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector;
+               pci_irq_vector(adapter->pdev, ENA_MGMNT_IRQ_IDX);
        cpu = cpumask_first(cpu_online_mask);
        adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu;
        cpumask_set_cpu(cpu,
@@ -1260,7 +1279,7 @@ static void ena_setup_io_intr(struct ena_adapter *adapter)
                adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io;
                adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i];
                adapter->irq_tbl[irq_idx].vector =
-                       adapter->msix_entries[irq_idx].vector;
+                       pci_irq_vector(adapter->pdev, irq_idx);
                adapter->irq_tbl[irq_idx].cpu = cpu;
 
                cpumask_set_cpu(cpu,
@@ -1298,12 +1317,6 @@ static int ena_request_io_irq(struct ena_adapter *adapter)
        struct ena_irq *irq;
        int rc = 0, i, k;
 
-       if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
-               netif_err(adapter, ifup, adapter->netdev,
-                         "Failed to request I/O IRQ: MSI-X is not enabled\n");
-               return -EINVAL;
-       }
-
        for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
                irq = &adapter->irq_tbl[i];
                rc = request_irq(irq->vector, irq->handler, flags, irq->name,
@@ -1362,16 +1375,6 @@ static void ena_free_io_irq(struct ena_adapter *adapter)
        }
 }
 
-static void ena_disable_msix(struct ena_adapter *adapter)
-{
-       if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags))
-               pci_disable_msix(adapter->pdev);
-
-       if (adapter->msix_entries)
-               vfree(adapter->msix_entries);
-       adapter->msix_entries = NULL;
-}
-
 static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
 {
        int i;
@@ -1488,6 +1491,11 @@ static int ena_up_complete(struct ena_adapter *adapter)
 
        ena_napi_enable_all(adapter);
 
+       /* Enable completion queues interrupt */
+       for (i = 0; i < adapter->num_queues; i++)
+               ena_unmask_interrupt(&adapter->tx_ring[i],
+                                    &adapter->rx_ring[i]);
+
        /* schedule napi in case we had pending packets
         * from the last time we disable napi
         */
@@ -1698,12 +1706,22 @@ static void ena_down(struct ena_adapter *adapter)
        adapter->dev_stats.interface_down++;
        u64_stats_update_end(&adapter->syncp);
 
-       /* After this point the napi handler won't enable the tx queue */
-       ena_napi_disable_all(adapter);
        netif_carrier_off(adapter->netdev);
        netif_tx_disable(adapter->netdev);
 
+       /* After this point the napi handler won't enable the tx queue */
+       ena_napi_disable_all(adapter);
+
        /* After destroy the queue there won't be any new interrupts */
+
+       if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) {
+               int rc;
+
+               rc = ena_com_dev_reset(adapter->ena_dev);
+               if (rc)
+                       dev_err(&adapter->pdev->dev, "Device reset failed\n");
+       }
+
        ena_destroy_all_io_queues(adapter);
 
        ena_disable_io_intr_sync(adapter);
@@ -2065,6 +2083,14 @@ static void ena_netpoll(struct net_device *netdev)
        struct ena_adapter *adapter = netdev_priv(netdev);
        int i;
 
+       /* Dont schedule NAPI if the driver is in the middle of reset
+        * or netdev is down.
+        */
+
+       if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags) ||
+           test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+               return;
+
        for (i = 0; i < adapter->num_queues; i++)
                napi_schedule(&adapter->ena_napi[i].napi);
 }
@@ -2169,28 +2195,46 @@ static struct rtnl_link_stats64 *ena_get_stats64(struct net_device *netdev,
                                                 struct rtnl_link_stats64 *stats)
 {
        struct ena_adapter *adapter = netdev_priv(netdev);
-       struct ena_admin_basic_stats ena_stats;
-       int rc;
+       struct ena_ring *rx_ring, *tx_ring;
+       unsigned int start;
+       u64 rx_drops;
+       int i;
 
        if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
                return NULL;
 
-       rc = ena_com_get_dev_basic_stats(adapter->ena_dev, &ena_stats);
-       if (rc)
-               return NULL;
+       for (i = 0; i < adapter->num_queues; i++) {
+               u64 bytes, packets;
 
-       stats->tx_bytes = ((u64)ena_stats.tx_bytes_high << 32) |
-               ena_stats.tx_bytes_low;
-       stats->rx_bytes = ((u64)ena_stats.rx_bytes_high << 32) |
-               ena_stats.rx_bytes_low;
+               tx_ring = &adapter->tx_ring[i];
 
-       stats->rx_packets = ((u64)ena_stats.rx_pkts_high << 32) |
-               ena_stats.rx_pkts_low;
-       stats->tx_packets = ((u64)ena_stats.tx_pkts_high << 32) |
-               ena_stats.tx_pkts_low;
+               do {
+                       start = u64_stats_fetch_begin_irq(&tx_ring->syncp);
+                       packets = tx_ring->tx_stats.cnt;
+                       bytes = tx_ring->tx_stats.bytes;
+               } while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start));
 
-       stats->rx_dropped = ((u64)ena_stats.rx_drops_high << 32) |
-               ena_stats.rx_drops_low;
+               stats->tx_packets += packets;
+               stats->tx_bytes += bytes;
+
+               rx_ring = &adapter->rx_ring[i];
+
+               do {
+                       start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
+                       packets = rx_ring->rx_stats.cnt;
+                       bytes = rx_ring->rx_stats.bytes;
+               } while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
+
+               stats->rx_packets += packets;
+               stats->rx_bytes += bytes;
+       }
+
+       do {
+               start = u64_stats_fetch_begin_irq(&adapter->syncp);
+               rx_drops = adapter->dev_stats.rx_drops;
+       } while (u64_stats_fetch_retry_irq(&adapter->syncp, start));
+
+       stats->rx_dropped = rx_drops;
 
        stats->multicast = 0;
        stats->collisions = 0;
@@ -2353,6 +2397,8 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
         */
        ena_com_set_admin_polling_mode(ena_dev, true);
 
+       ena_config_host_info(ena_dev);
+
        /* Get Device Attributes*/
        rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
        if (rc) {
@@ -2377,11 +2423,10 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
 
        *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
 
-       ena_config_host_info(ena_dev);
-
        return 0;
 
 err_admin_init:
+       ena_com_delete_host_info(ena_dev);
        ena_com_admin_destroy(ena_dev);
 err_mmio_read_less:
        ena_com_mmio_reg_read_request_destroy(ena_dev);
@@ -2417,8 +2462,7 @@ static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter,
        return 0;
 
 err_disable_msix:
-       ena_disable_msix(adapter);
-
+       pci_free_irq_vectors(adapter->pdev);
        return rc;
 }
 
@@ -2433,6 +2477,14 @@ static void ena_fw_reset_device(struct work_struct *work)
        bool dev_up, wd_state;
        int rc;
 
+       if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+               dev_err(&pdev->dev,
+                       "device reset schedule while reset bit is off\n");
+               return;
+       }
+
+       netif_carrier_off(netdev);
+
        del_timer_sync(&adapter->timer_service);
 
        rtnl_lock();
@@ -2446,15 +2498,9 @@ static void ena_fw_reset_device(struct work_struct *work)
         */
        ena_close(netdev);
 
-       rc = ena_com_dev_reset(ena_dev);
-       if (rc) {
-               dev_err(&pdev->dev, "Device reset failed\n");
-               goto err;
-       }
-
        ena_free_mgmnt_irq(adapter);
 
-       ena_disable_msix(adapter);
+       pci_free_irq_vectors(adapter->pdev);
 
        ena_com_abort_admin_commands(ena_dev);
 
@@ -2464,6 +2510,8 @@ static void ena_fw_reset_device(struct work_struct *work)
 
        ena_com_mmio_reg_read_request_destroy(ena_dev);
 
+       clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+
        /* Finish with the destroy part. Start the init part */
 
        rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
@@ -2503,12 +2551,14 @@ static void ena_fw_reset_device(struct work_struct *work)
        return;
 err_disable_msix:
        ena_free_mgmnt_irq(adapter);
-       ena_disable_msix(adapter);
+       pci_free_irq_vectors(adapter->pdev);
 err_device_destroy:
        ena_com_admin_destroy(ena_dev);
 err:
        rtnl_unlock();
 
+       clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
+
        dev_err(&pdev->dev,
                "Reset attempt failed. Can not reset the device\n");
 }
@@ -2527,6 +2577,9 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter)
        if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
                return;
 
+       if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+               return;
+
        budget = ENA_MONITORED_TX_QUEUES;
 
        for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
@@ -2626,7 +2679,7 @@ static void ena_timer_service(unsigned long data)
        if (host_info)
                ena_update_host_info(host_info, adapter->netdev);
 
-       if (unlikely(test_and_clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+       if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
                netif_err(adapter, drv, adapter->netdev,
                          "Trigger reset is on\n");
                ena_dump_stats_to_dmesg(adapter);
@@ -2660,7 +2713,7 @@ static int ena_calc_io_queue_num(struct pci_dev *pdev,
                io_sq_num = get_feat_ctx->max_queues.max_sq_num;
        }
 
-       io_queue_num = min_t(int, num_possible_cpus(), ENA_MAX_NUM_IO_QUEUES);
+       io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
        io_queue_num = min_t(int, io_queue_num, io_sq_num);
        io_queue_num = min_t(int, io_queue_num,
                             get_feat_ctx->max_queues.max_cq_num);
@@ -2722,7 +2775,6 @@ static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
        netdev->features =
                dev_features |
                NETIF_F_SG |
-               NETIF_F_NTUPLE |
                NETIF_F_RXHASH |
                NETIF_F_HIGHDMA;
 
@@ -3033,7 +3085,7 @@ err_rss:
 err_free_msix:
        ena_com_dev_reset(ena_dev);
        ena_free_mgmnt_irq(adapter);
-       ena_disable_msix(adapter);
+       pci_free_irq_vectors(adapter->pdev);
 err_worker_destroy:
        ena_com_destroy_interrupt_moderation(ena_dev);
        del_timer(&adapter->timer_service);
@@ -3093,12 +3145,6 @@ static void ena_remove(struct pci_dev *pdev)
        struct ena_com_dev *ena_dev;
        struct net_device *netdev;
 
-       if (!adapter)
-               /* This device didn't load properly and it's resources
-                * already released, nothing to do
-                */
-               return;
-
        ena_dev = adapter->ena_dev;
        netdev = adapter->netdev;
 
@@ -3118,11 +3164,13 @@ static void ena_remove(struct pci_dev *pdev)
 
        cancel_work_sync(&adapter->resume_io_task);
 
-       ena_com_dev_reset(ena_dev);
+       /* Reset the device only if the device is running. */
+       if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
+               ena_com_dev_reset(ena_dev);
 
        ena_free_mgmnt_irq(adapter);
 
-       ena_disable_msix(adapter);
+       pci_free_irq_vectors(adapter->pdev);
 
        free_netdev(netdev);