]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/commitdiff
net/mlx4_core: Avoid delays during VF driver device shutdown
authorJack Morgenstein <jackm@dev.mellanox.co.il>
Tue, 28 Mar 2017 15:55:32 +0000 (11:55 -0400)
committerTim Gardner <tim.gardner@canonical.com>
Tue, 28 Mar 2017 20:17:54 +0000 (14:17 -0600)
BugLink: http://bugs.launchpad.net/bugs/1672785
Some Hypervisors detach VFs from VMs by instantly causing an FLR event
to be generated for a VF.

In the mlx4 case, this will cause that VF's comm channel to be disabled
before the VM has an opportunity to invoke the VF device's "shutdown"
method.

For such Hypervisors, there is a race condition between the VF's
shutdown method and its internal-error detection/reset thread.

The internal-error detection/reset thread (which runs every 5 seconds) also
detects a disabled comm channel. If the internal-error detection/reset
flow wins the race, we still get delays (while that flow tries repeatedly
to detect comm-channel recovery).

The cited commit fixed the command timeout problem when the
internal-error detection/reset flow loses the race.

This commit avoids the unneeded delays when the internal-error
detection/reset flow wins.

Fixes: d585df1c5ccf ("net/mlx4_core: Avoid command timeouts during VF driver device shutdown")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Reported-by: Simon Xiao <sixiao@microsoft.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 4cbe4dac82e423ecc9a0ba46af24a860853259f4)
Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com>
Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
drivers/net/ethernet/mellanox/mlx4/cmd.c
drivers/net/ethernet/mellanox/mlx4/main.c
include/linux/mlx4/device.h

index a49072b4fa520c1ba218a77438e8f9f4c653ef75..2710de5a04924504ba68539a74396e1806bddd0b 100644 (file)
@@ -2304,6 +2304,17 @@ static int sync_toggles(struct mlx4_dev *dev)
                rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read));
                if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) {
                        /* PCI might be offline */
+
+                       /* If device removal has been requested,
+                        * do not continue retrying.
+                        */
+                       if (dev->persist->interface_state &
+                           MLX4_INTERFACE_STATE_NOWAIT) {
+                               mlx4_warn(dev,
+                                         "communication channel is offline\n");
+                               return -EIO;
+                       }
+
                        msleep(100);
                        wr_toggle = swab32(readl(&priv->mfunc.comm->
                                           slave_write));
index bffa6f345f2f40e35ebab8e546237da4fbe6b6a8..5428212612c0fbe1cb3141673e9dc91594fd69c8 100644 (file)
@@ -1942,6 +1942,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev)
                               (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
                if (!offline_bit)
                        return 0;
+
+               /* If device removal has been requested,
+                * do not continue retrying.
+                */
+               if (dev->persist->interface_state &
+                   MLX4_INTERFACE_STATE_NOWAIT)
+                       break;
+
                /* There are cases as part of AER/Reset flow that PF needs
                 * around 100 msec to load. We therefore sleep for 100 msec
                 * to allow other tasks to make use of that CPU during this
@@ -3956,6 +3964,9 @@ static void mlx4_remove_one(struct pci_dev *pdev)
        struct devlink *devlink = priv_to_devlink(priv);
        int active_vfs = 0;
 
+       if (mlx4_is_slave(dev))
+               persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT;
+
        mutex_lock(&persist->interface_state_mutex);
        persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
        mutex_unlock(&persist->interface_state_mutex);
index 6533c16e27ad7fb03926286ec94055d26b26f615..b8d9db3681f91d94189b3bed3aa73263a31eaad1 100644 (file)
@@ -476,6 +476,7 @@ enum {
 enum {
        MLX4_INTERFACE_STATE_UP         = 1 << 0,
        MLX4_INTERFACE_STATE_DELETION   = 1 << 1,
+       MLX4_INTERFACE_STATE_NOWAIT     = 1 << 2,
 };
 
 #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \