]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/commitdiff
IB/core: Let IB core distribute cache update events
authorParav Pandit <parav@mellanox.com>
Thu, 12 Dec 2019 11:30:22 +0000 (13:30 +0200)
committerPaolo Pisati <paolo.pisati@canonical.com>
Mon, 24 Feb 2020 15:19:28 +0000 (16:19 +0100)
BugLink: https://bugs.launchpad.net/bugs/1864488
[ Upstream commit 6b57cea9221b0247ad5111b348522625e489a8e4 ]

Currently when the low level driver notifies Pkey, GID, and port change
events they are notified to the registered handlers in the order they are
registered.

IB core and other ULPs such as IPoIB are interested in GID, LID, Pkey
change events.

Since all GID queries done by ULPs are serviced by IB core, and the IB
core deferes cache updates to a work queue, it is possible for other
clients to see stale cache data when they handle their own events.

For example, the below call tree shows how ipoib will call
rdma_query_gid() concurrently with the update to the cache sitting in the
WQ.

mlx5_ib_handle_event()
  ib_dispatch_event()
    ib_cache_event()
       queue_work() -> slow cache update

    [..]
    ipoib_event()
     queue_work()
       [..]
       work handler
         ipoib_ib_dev_flush_light()
           __ipoib_ib_dev_flush()
              ipoib_dev_addr_changed_valid()
                rdma_query_gid() <- Returns old GID, cache not updated.

Move all the event dispatch to a work queue so that the cache update is
always done before any clients are notified.

Fixes: f35faa4ba956 ("IB/core: Simplify ib_query_gid to always refer to cache")
Link: https://lore.kernel.org/r/20191212113024.336702-3-leon@kernel.org
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Paolo Pisati <paolo.pisati@canonical.com>
drivers/infiniband/core/cache.c
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/device.c
include/rdma/ib_verbs.h

index 00fb3eacda1944d780cc5810624c6ae38a881601..65b10efca2b8cbcbdaac2c1828ab7dcc059a6428 100644 (file)
@@ -51,9 +51,8 @@ struct ib_pkey_cache {
 
 struct ib_update_work {
        struct work_struct work;
-       struct ib_device  *device;
-       u8                 port_num;
-       bool               enforce_security;
+       struct ib_event event;
+       bool enforce_security;
 };
 
 union ib_gid zgid;
@@ -130,7 +129,7 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
        event.element.port_num  = port;
        event.event             = IB_EVENT_GID_CHANGE;
 
-       ib_dispatch_event(&event);
+       ib_dispatch_event_clients(&event);
 }
 
 static const char * const gid_type_str[] = {
@@ -1387,9 +1386,8 @@ err:
        return ret;
 }
 
-static void ib_cache_update(struct ib_device *device,
-                           u8                port,
-                           bool              enforce_security)
+static int
+ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
 {
        struct ib_port_attr       *tprops = NULL;
        struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
@@ -1397,11 +1395,11 @@ static void ib_cache_update(struct ib_device *device,
        int                        ret;
 
        if (!rdma_is_port_valid(device, port))
-               return;
+               return -EINVAL;
 
        tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
        if (!tprops)
-               return;
+               return -ENOMEM;
 
        ret = ib_query_port(device, port, tprops);
        if (ret) {
@@ -1419,8 +1417,10 @@ static void ib_cache_update(struct ib_device *device,
        pkey_cache = kmalloc(struct_size(pkey_cache, table,
                                         tprops->pkey_tbl_len),
                             GFP_KERNEL);
-       if (!pkey_cache)
+       if (!pkey_cache) {
+               ret = -ENOMEM;
                goto err;
+       }
 
        pkey_cache->table_len = tprops->pkey_tbl_len;
 
@@ -1452,50 +1452,84 @@ static void ib_cache_update(struct ib_device *device,
 
        kfree(old_pkey_cache);
        kfree(tprops);
-       return;
+       return 0;
 
 err:
        kfree(pkey_cache);
        kfree(tprops);
+       return ret;
+}
+
+static void ib_cache_event_task(struct work_struct *_work)
+{
+       struct ib_update_work *work =
+               container_of(_work, struct ib_update_work, work);
+       int ret;
+
+       /* Before distributing the cache update event, first sync
+        * the cache.
+        */
+       ret = ib_cache_update(work->event.device, work->event.element.port_num,
+                             work->enforce_security);
+
+       /* GID event is notified already for individual GID entries by
+        * dispatch_gid_change_event(). Hence, notifiy for rest of the
+        * events.
+        */
+       if (!ret && work->event.event != IB_EVENT_GID_CHANGE)
+               ib_dispatch_event_clients(&work->event);
+
+       kfree(work);
 }
 
-static void ib_cache_task(struct work_struct *_work)
+static void ib_generic_event_task(struct work_struct *_work)
 {
        struct ib_update_work *work =
                container_of(_work, struct ib_update_work, work);
 
-       ib_cache_update(work->device,
-                       work->port_num,
-                       work->enforce_security);
+       ib_dispatch_event_clients(&work->event);
        kfree(work);
 }
 
-static void ib_cache_event(struct ib_event_handler *handler,
-                          struct ib_event *event)
+static bool is_cache_update_event(const struct ib_event *event)
+{
+       return (event->event == IB_EVENT_PORT_ERR    ||
+               event->event == IB_EVENT_PORT_ACTIVE ||
+               event->event == IB_EVENT_LID_CHANGE  ||
+               event->event == IB_EVENT_PKEY_CHANGE ||
+               event->event == IB_EVENT_CLIENT_REREGISTER ||
+               event->event == IB_EVENT_GID_CHANGE);
+}
+
+/**
+ * ib_dispatch_event - Dispatch an asynchronous event
+ * @event:Event to dispatch
+ *
+ * Low-level drivers must call ib_dispatch_event() to dispatch the
+ * event to all registered event handlers when an asynchronous event
+ * occurs.
+ */
+void ib_dispatch_event(const struct ib_event *event)
 {
        struct ib_update_work *work;
 
-       if (event->event == IB_EVENT_PORT_ERR    ||
-           event->event == IB_EVENT_PORT_ACTIVE ||
-           event->event == IB_EVENT_LID_CHANGE  ||
-           event->event == IB_EVENT_PKEY_CHANGE ||
-           event->event == IB_EVENT_CLIENT_REREGISTER ||
-           event->event == IB_EVENT_GID_CHANGE) {
-               work = kmalloc(sizeof *work, GFP_ATOMIC);
-               if (work) {
-                       INIT_WORK(&work->work, ib_cache_task);
-                       work->device   = event->device;
-                       work->port_num = event->element.port_num;
-                       if (event->event == IB_EVENT_PKEY_CHANGE ||
-                           event->event == IB_EVENT_GID_CHANGE)
-                               work->enforce_security = true;
-                       else
-                               work->enforce_security = false;
-
-                       queue_work(ib_wq, &work->work);
-               }
-       }
+       work = kzalloc(sizeof(*work), GFP_ATOMIC);
+       if (!work)
+               return;
+
+       if (is_cache_update_event(event))
+               INIT_WORK(&work->work, ib_cache_event_task);
+       else
+               INIT_WORK(&work->work, ib_generic_event_task);
+
+       work->event = *event;
+       if (event->event == IB_EVENT_PKEY_CHANGE ||
+           event->event == IB_EVENT_GID_CHANGE)
+               work->enforce_security = true;
+
+       queue_work(ib_wq, &work->work);
 }
+EXPORT_SYMBOL(ib_dispatch_event);
 
 int ib_cache_setup_one(struct ib_device *device)
 {
@@ -1511,9 +1545,6 @@ int ib_cache_setup_one(struct ib_device *device)
        rdma_for_each_port (device, p)
                ib_cache_update(device, p, true);
 
-       INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
-                             device, ib_cache_event);
-       ib_register_event_handler(&device->cache.event_handler);
        return 0;
 }
 
@@ -1535,14 +1566,12 @@ void ib_cache_release_one(struct ib_device *device)
 
 void ib_cache_cleanup_one(struct ib_device *device)
 {
-       /* The cleanup function unregisters the event handler,
-        * waits for all in-progress workqueue elements and cleans
-        * up the GID cache. This function should be called after
-        * the device was removed from the devices list and all
-        * clients were removed, so the cache exists but is
+       /* The cleanup function waits for all in-progress workqueue
+        * elements and cleans up the GID cache. This function should be
+        * called after the device was removed from the devices list and
+        * all clients were removed, so the cache exists but is
         * non-functional and shouldn't be updated anymore.
         */
-       ib_unregister_event_handler(&device->cache.event_handler);
        flush_workqueue(ib_wq);
        gid_table_cleanup_one(device);
 
index 9d07378b5b4230f6e5f1568f3e6b428e95f7b98a..9b30773f2da05fba4e1eb52a8de3ec133b44f139 100644 (file)
@@ -149,6 +149,7 @@ unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
 int ib_cache_setup_one(struct ib_device *device);
 void ib_cache_cleanup_one(struct ib_device *device);
 void ib_cache_release_one(struct ib_device *device);
+void ib_dispatch_event_clients(struct ib_event *event);
 
 #ifdef CONFIG_CGROUP_RDMA
 void ib_device_register_rdmacg(struct ib_device *device);
index 2b5bd7206fc6e13191a5ec8a97af1f25267c7c54..2a770b8dca0036d5ede5043b78ff6fe11bf26f68 100644 (file)
@@ -591,6 +591,7 @@ struct ib_device *_ib_alloc_device(size_t size)
 
        INIT_LIST_HEAD(&device->event_handler_list);
        spin_lock_init(&device->event_handler_lock);
+       init_rwsem(&device->event_handler_rwsem);
        mutex_init(&device->unregistration_lock);
        /*
         * client_data needs to be alloc because we don't want our mark to be
@@ -1932,17 +1933,15 @@ EXPORT_SYMBOL(ib_set_client_data);
  *
  * ib_register_event_handler() registers an event handler that will be
  * called back when asynchronous IB events occur (as defined in
- * chapter 11 of the InfiniBand Architecture Specification).  This
- * callback may occur in interrupt context.
+ * chapter 11 of the InfiniBand Architecture Specification). This
+ * callback occurs in workqueue context.
  */
 void ib_register_event_handler(struct ib_event_handler *event_handler)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
+       down_write(&event_handler->device->event_handler_rwsem);
        list_add_tail(&event_handler->list,
                      &event_handler->device->event_handler_list);
-       spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
+       up_write(&event_handler->device->event_handler_rwsem);
 }
 EXPORT_SYMBOL(ib_register_event_handler);
 
@@ -1955,35 +1954,23 @@ EXPORT_SYMBOL(ib_register_event_handler);
  */
 void ib_unregister_event_handler(struct ib_event_handler *event_handler)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
+       down_write(&event_handler->device->event_handler_rwsem);
        list_del(&event_handler->list);
-       spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
+       up_write(&event_handler->device->event_handler_rwsem);
 }
 EXPORT_SYMBOL(ib_unregister_event_handler);
 
-/**
- * ib_dispatch_event - Dispatch an asynchronous event
- * @event:Event to dispatch
- *
- * Low-level drivers must call ib_dispatch_event() to dispatch the
- * event to all registered event handlers when an asynchronous event
- * occurs.
- */
-void ib_dispatch_event(struct ib_event *event)
+void ib_dispatch_event_clients(struct ib_event *event)
 {
-       unsigned long flags;
        struct ib_event_handler *handler;
 
-       spin_lock_irqsave(&event->device->event_handler_lock, flags);
+       down_read(&event->device->event_handler_rwsem);
 
        list_for_each_entry(handler, &event->device->event_handler_list, list)
                handler->handler(handler, event);
 
-       spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
+       up_read(&event->device->event_handler_rwsem);
 }
-EXPORT_SYMBOL(ib_dispatch_event);
 
 static int iw_query_port(struct ib_device *device,
                           u8 port_num,
index 75c7b5ed53c595fe9372b16784d10456094718cb..30d50528d710ebf8243de9e2e18a7d1b0a2e7b6d 100644 (file)
@@ -2146,7 +2146,6 @@ struct ib_port_cache {
 
 struct ib_cache {
        rwlock_t                lock;
-       struct ib_event_handler event_handler;
 };
 
 struct ib_port_immutable {
@@ -2590,7 +2589,11 @@ struct ib_device {
        struct rcu_head rcu_head;
 
        struct list_head              event_handler_list;
-       spinlock_t                    event_handler_lock;
+       /* Protects event_handler_list */
+       struct rw_semaphore event_handler_rwsem;
+
+       /* Protects QP's event_handler calls and open_qp list */
+       spinlock_t event_handler_lock;
 
        struct rw_semaphore           client_data_rwsem;
        struct xarray                 client_data;
@@ -2897,7 +2900,7 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
 
 void ib_register_event_handler(struct ib_event_handler *event_handler);
 void ib_unregister_event_handler(struct ib_event_handler *event_handler);
-void ib_dispatch_event(struct ib_event *event);
+void ib_dispatch_event(const struct ib_event *event);
 
 int ib_query_port(struct ib_device *device,
                  u8 port_num, struct ib_port_attr *port_attr);