]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/commitdiff
UBUNTU: SAUCE: RDMA/core: Introduce peer memory interface
authorJason Gunthorpe <jgg@nvidia.com>
Thu, 8 Apr 2021 14:37:58 +0000 (08:37 -0600)
committerSeth Forshee <seth.forshee@canonical.com>
Thu, 8 Apr 2021 20:42:54 +0000 (15:42 -0500)
BugLink: https://bugs.launchpad.net/bugs/1923104
The peer_memory_client scheme allows a driver to register with the ib_umem
system that it has the ability to understand user virtual address ranges
that are not compatible with get_user_pages(). For instance VMAs created
with io_remap_pfn_range(), or other driver special VMA.

For ranges the interface understands it can provide a DMA mapped sg_table
for use by the ib_umem, allowing user virtual ranges that cannot be
supported by get_user_pages() to be used as umems for RDMA.

This is designed to preserve the kABI, no functions or structures are
changed, only new symbols are added:

 ib_register_peer_memory_client
 ib_unregister_peer_memory_client
 ib_umem_activate_invalidation_notifier
 ib_umem_get_peer

And a bitfield in struct ib_umem uses more bits.

This interface is compatible with the two out of tree GPU drivers:
 https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/master/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c
 https://github.com/Mellanox/nv_peer_memory/blob/master/nv_peer_mem.c

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
(cherry picked from commit a42989294cf39d6e829424734ab0e7ec48bebcef
 git://git.kernel.org/pub/scm/linux/kernel/git/leon/linux-rdma.git)
Signed-off-by: dann frazier <dann.frazier@canonical.com>
Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
12 files changed:
drivers/infiniband/core/Makefile
drivers/infiniband/core/ib_peer_mem.h [new file with mode: 0644]
drivers/infiniband/core/peer_mem.c [new file with mode: 0644]
drivers/infiniband/core/umem.c
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/devx.c
drivers/infiniband/hw/mlx5/doorbell.c
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mlx5/srq.c
include/rdma/ib_umem.h
include/rdma/peer_mem.h [new file with mode: 0644]

index ccf2670ef45efe83c4b1d88806ff23e4de2add63..f72db653d443063dc5db820fee275a569ac6b2f0 100644 (file)
@@ -40,5 +40,5 @@ ib_uverbs-y :=                        uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
                                uverbs_std_types_srq.o \
                                uverbs_std_types_wq.o \
                                uverbs_std_types_qp.o
-ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
+ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o peer_mem.o
 ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
diff --git a/drivers/infiniband/core/ib_peer_mem.h b/drivers/infiniband/core/ib_peer_mem.h
new file mode 100644 (file)
index 0000000..684bcb8
--- /dev/null
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2014-2020,  Mellanox Technologies. All rights reserved.
+ */
+#ifndef RDMA_IB_PEER_MEM_H
+#define RDMA_IB_PEER_MEM_H
+
+#include <rdma/peer_mem.h>
+#include <linux/kobject.h>
+#include <linux/xarray.h>
+#include <rdma/ib_umem.h>
+
+struct ib_peer_memory_statistics {
+       atomic64_t num_alloc_mrs;
+       atomic64_t num_dealloc_mrs;
+       atomic64_t num_reg_pages;
+       atomic64_t num_dereg_pages;
+       atomic64_t num_reg_bytes;
+       atomic64_t num_dereg_bytes;
+       unsigned long num_free_callbacks;
+};
+
+struct ib_peer_memory_client {
+       struct kobject kobj;
+       refcount_t usecnt;
+       struct completion usecnt_zero;
+       const struct peer_memory_client *peer_mem;
+       struct list_head core_peer_list;
+       struct ib_peer_memory_statistics stats;
+       struct xarray umem_xa;
+       u32 xa_cyclic_next;
+       bool invalidation_required;
+};
+
+struct ib_umem_peer {
+       struct ib_umem umem;
+       struct kref kref;
+       /* peer memory that manages this umem */
+       struct ib_peer_memory_client *ib_peer_client;
+       void *peer_client_context;
+       umem_invalidate_func_t invalidation_func;
+       void *invalidation_private;
+       struct mutex mapping_lock;
+       bool mapped;
+       u32 xa_id;
+       struct scatterlist *first_sg;
+       dma_addr_t first_dma_address;
+       unsigned int first_dma_length;
+       unsigned int first_length;
+       struct scatterlist *last_sg;
+       unsigned int last_dma_length;
+       unsigned int last_length;
+};
+
+struct ib_umem *ib_peer_umem_get(struct ib_umem *old_umem, int old_ret,
+                                unsigned long peer_mem_flags);
+void ib_peer_umem_release(struct ib_umem *umem);
+
+#endif
diff --git a/drivers/infiniband/core/peer_mem.c b/drivers/infiniband/core/peer_mem.c
new file mode 100644 (file)
index 0000000..f01bf92
--- /dev/null
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2014-2020,  Mellanox Technologies. All rights reserved.
+ */
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <linux/sched/mm.h>
+#include "ib_peer_mem.h"
+
+static DEFINE_MUTEX(peer_memory_mutex);
+static LIST_HEAD(peer_memory_list);
+static struct kobject *peers_kobj;
+#define PEER_NO_INVALIDATION_ID U32_MAX
+
+static int ib_invalidate_peer_memory(void *reg_handle, u64 core_context);
+
+struct peer_mem_attribute {
+       struct attribute attr;
+       ssize_t (*show)(struct ib_peer_memory_client *ib_peer_client,
+                       struct peer_mem_attribute *attr, char *buf);
+       ssize_t (*store)(struct ib_peer_memory_client *ib_peer_client,
+                        struct peer_mem_attribute *attr, const char *buf,
+                        size_t count);
+};
+#define PEER_ATTR_RO(_name)                                                    \
+       struct peer_mem_attribute peer_attr_ ## _name = __ATTR_RO(_name)
+
+static ssize_t version_show(struct ib_peer_memory_client *ib_peer_client,
+                           struct peer_mem_attribute *attr, char *buf)
+{
+       return scnprintf(buf, PAGE_SIZE, "%s\n",
+                        ib_peer_client->peer_mem->version);
+}
+static PEER_ATTR_RO(version);
+
+static ssize_t num_alloc_mrs_show(struct ib_peer_memory_client *ib_peer_client,
+                                 struct peer_mem_attribute *attr, char *buf)
+{
+       return scnprintf(
+               buf, PAGE_SIZE, "%llu\n",
+               (u64)atomic64_read(&ib_peer_client->stats.num_alloc_mrs));
+}
+static PEER_ATTR_RO(num_alloc_mrs);
+
+static ssize_t
+num_dealloc_mrs_show(struct ib_peer_memory_client *ib_peer_client,
+                    struct peer_mem_attribute *attr, char *buf)
+
+{
+       return scnprintf(
+               buf, PAGE_SIZE, "%llu\n",
+               (u64)atomic64_read(&ib_peer_client->stats.num_dealloc_mrs));
+}
+static PEER_ATTR_RO(num_dealloc_mrs);
+
+static ssize_t num_reg_pages_show(struct ib_peer_memory_client *ib_peer_client,
+                                 struct peer_mem_attribute *attr, char *buf)
+{
+       return scnprintf(
+               buf, PAGE_SIZE, "%llu\n",
+               (u64)atomic64_read(&ib_peer_client->stats.num_reg_pages));
+}
+static PEER_ATTR_RO(num_reg_pages);
+
+static ssize_t
+num_dereg_pages_show(struct ib_peer_memory_client *ib_peer_client,
+                    struct peer_mem_attribute *attr, char *buf)
+{
+       return scnprintf(
+               buf, PAGE_SIZE, "%llu\n",
+               (u64)atomic64_read(&ib_peer_client->stats.num_dereg_pages));
+}
+static PEER_ATTR_RO(num_dereg_pages);
+
+static ssize_t num_reg_bytes_show(struct ib_peer_memory_client *ib_peer_client,
+                                 struct peer_mem_attribute *attr, char *buf)
+{
+       return scnprintf(
+               buf, PAGE_SIZE, "%llu\n",
+               (u64)atomic64_read(&ib_peer_client->stats.num_reg_bytes));
+}
+static PEER_ATTR_RO(num_reg_bytes);
+
+static ssize_t
+num_dereg_bytes_show(struct ib_peer_memory_client *ib_peer_client,
+                    struct peer_mem_attribute *attr, char *buf)
+{
+       return scnprintf(
+               buf, PAGE_SIZE, "%llu\n",
+               (u64)atomic64_read(&ib_peer_client->stats.num_dereg_bytes));
+}
+static PEER_ATTR_RO(num_dereg_bytes);
+
+static ssize_t
+num_free_callbacks_show(struct ib_peer_memory_client *ib_peer_client,
+                       struct peer_mem_attribute *attr, char *buf)
+{
+       return scnprintf(buf, PAGE_SIZE, "%lu\n",
+                        ib_peer_client->stats.num_free_callbacks);
+}
+static PEER_ATTR_RO(num_free_callbacks);
+
+static struct attribute *peer_mem_attrs[] = {
+                       &peer_attr_version.attr,
+                       &peer_attr_num_alloc_mrs.attr,
+                       &peer_attr_num_dealloc_mrs.attr,
+                       &peer_attr_num_reg_pages.attr,
+                       &peer_attr_num_dereg_pages.attr,
+                       &peer_attr_num_reg_bytes.attr,
+                       &peer_attr_num_dereg_bytes.attr,
+                       &peer_attr_num_free_callbacks.attr,
+                       NULL,
+};
+
+static const struct attribute_group peer_mem_attr_group = {
+       .attrs = peer_mem_attrs,
+};
+
+static ssize_t peer_attr_show(struct kobject *kobj, struct attribute *attr,
+                             char *buf)
+{
+       struct peer_mem_attribute *peer_attr =
+               container_of(attr, struct peer_mem_attribute, attr);
+
+       if (!peer_attr->show)
+               return -EIO;
+       return peer_attr->show(container_of(kobj, struct ib_peer_memory_client,
+                                           kobj),
+                              peer_attr, buf);
+}
+
+static const struct sysfs_ops peer_mem_sysfs_ops = {
+       .show = peer_attr_show,
+};
+
+static void ib_peer_memory_client_release(struct kobject *kobj)
+{
+       struct ib_peer_memory_client *ib_peer_client =
+               container_of(kobj, struct ib_peer_memory_client, kobj);
+
+       kfree(ib_peer_client);
+}
+
+static struct kobj_type peer_mem_type = {
+       .sysfs_ops = &peer_mem_sysfs_ops,
+       .release = ib_peer_memory_client_release,
+};
+
+static int ib_memory_peer_check_mandatory(const struct peer_memory_client
+                                                    *peer_client)
+{
+#define PEER_MEM_MANDATORY_FUNC(x) {offsetof(struct peer_memory_client, x), #x}
+       int i;
+       static const struct {
+               size_t offset;
+               char *name;
+       } mandatory_table[] = {
+               PEER_MEM_MANDATORY_FUNC(acquire),
+               PEER_MEM_MANDATORY_FUNC(get_pages),
+               PEER_MEM_MANDATORY_FUNC(put_pages),
+               PEER_MEM_MANDATORY_FUNC(dma_map),
+               PEER_MEM_MANDATORY_FUNC(dma_unmap),
+       };
+
+       for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
+               if (!*(void **)((void *)peer_client +
+                               mandatory_table[i].offset)) {
+                       pr_err("Peer memory %s is missing mandatory function %s\n",
+                              peer_client->name, mandatory_table[i].name);
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+void *
+ib_register_peer_memory_client(const struct peer_memory_client *peer_client,
+                              invalidate_peer_memory *invalidate_callback)
+{
+       struct ib_peer_memory_client *ib_peer_client;
+       int ret;
+
+       if (ib_memory_peer_check_mandatory(peer_client))
+               return NULL;
+
+       ib_peer_client = kzalloc(sizeof(*ib_peer_client), GFP_KERNEL);
+       if (!ib_peer_client)
+               return NULL;
+       kobject_init(&ib_peer_client->kobj, &peer_mem_type);
+       refcount_set(&ib_peer_client->usecnt, 1);
+       init_completion(&ib_peer_client->usecnt_zero);
+       ib_peer_client->peer_mem = peer_client;
+       xa_init_flags(&ib_peer_client->umem_xa, XA_FLAGS_ALLOC);
+
+       /*
+        * If the peer wants the invalidation_callback then all memory users
+        * linked to that peer must support invalidation.
+        */
+       if (invalidate_callback) {
+               *invalidate_callback = ib_invalidate_peer_memory;
+               ib_peer_client->invalidation_required = true;
+       }
+
+       mutex_lock(&peer_memory_mutex);
+       if (!peers_kobj) {
+               /* Created under /sys/kernel/mm */
+               peers_kobj = kobject_create_and_add("memory_peers", mm_kobj);
+               if (!peers_kobj)
+                       goto err_unlock;
+       }
+
+       ret = kobject_add(&ib_peer_client->kobj, peers_kobj, peer_client->name);
+       if (ret)
+               goto err_parent;
+
+       ret = sysfs_create_group(&ib_peer_client->kobj,
+                                &peer_mem_attr_group);
+       if (ret)
+               goto err_parent;
+       list_add_tail(&ib_peer_client->core_peer_list, &peer_memory_list);
+       mutex_unlock(&peer_memory_mutex);
+       return ib_peer_client;
+
+err_parent:
+       if (list_empty(&peer_memory_list)) {
+               kobject_put(peers_kobj);
+               peers_kobj = NULL;
+       }
+err_unlock:
+       mutex_unlock(&peer_memory_mutex);
+       kobject_put(&ib_peer_client->kobj);
+       return NULL;
+}
+EXPORT_SYMBOL(ib_register_peer_memory_client);
+
+void ib_unregister_peer_memory_client(void *reg_handle)
+{
+       struct ib_peer_memory_client *ib_peer_client = reg_handle;
+
+       mutex_lock(&peer_memory_mutex);
+       list_del(&ib_peer_client->core_peer_list);
+       if (list_empty(&peer_memory_list)) {
+               kobject_put(peers_kobj);
+               peers_kobj = NULL;
+       }
+       mutex_unlock(&peer_memory_mutex);
+
+       /*
+        * Wait for all umems to be destroyed before returning. Once
+        * ib_unregister_peer_memory_client() returns no umems will call any
+        * peer_mem ops.
+        */
+       if (refcount_dec_and_test(&ib_peer_client->usecnt))
+               complete(&ib_peer_client->usecnt_zero);
+       wait_for_completion(&ib_peer_client->usecnt_zero);
+
+       kobject_put(&ib_peer_client->kobj);
+}
+EXPORT_SYMBOL(ib_unregister_peer_memory_client);
+
+static struct ib_peer_memory_client *
+ib_get_peer_client(unsigned long addr, size_t size,
+                  unsigned long peer_mem_flags, void **peer_client_context)
+{
+       struct ib_peer_memory_client *ib_peer_client;
+       int ret = 0;
+
+       mutex_lock(&peer_memory_mutex);
+       list_for_each_entry(ib_peer_client, &peer_memory_list,
+                           core_peer_list) {
+               if (ib_peer_client->invalidation_required &&
+                   (!(peer_mem_flags & IB_PEER_MEM_INVAL_SUPP)))
+                       continue;
+               ret = ib_peer_client->peer_mem->acquire(addr, size, NULL, NULL,
+                                                       peer_client_context);
+               if (ret > 0) {
+                       refcount_inc(&ib_peer_client->usecnt);
+                       mutex_unlock(&peer_memory_mutex);
+                       return ib_peer_client;
+               }
+       }
+       mutex_unlock(&peer_memory_mutex);
+       return NULL;
+}
+
+static void ib_put_peer_client(struct ib_peer_memory_client *ib_peer_client,
+                              void *peer_client_context)
+{
+       if (ib_peer_client->peer_mem->release)
+               ib_peer_client->peer_mem->release(peer_client_context);
+       if (refcount_dec_and_test(&ib_peer_client->usecnt))
+               complete(&ib_peer_client->usecnt_zero);
+}
+
+static void ib_peer_umem_kref_release(struct kref *kref)
+{
+       kfree(container_of(kref, struct ib_umem_peer, kref));
+}
+
+static void ib_unmap_peer_client(struct ib_umem_peer *umem_p)
+{
+       struct ib_peer_memory_client *ib_peer_client = umem_p->ib_peer_client;
+       const struct peer_memory_client *peer_mem = ib_peer_client->peer_mem;
+       struct ib_umem *umem = &umem_p->umem;
+
+       lockdep_assert_held(&umem_p->mapping_lock);
+
+       if (umem_p->last_sg) {
+               umem_p->last_sg->length = umem_p->last_length;
+               sg_dma_len(umem_p->last_sg) = umem_p->last_dma_length;
+       }
+
+       if (umem_p->first_sg) {
+               umem_p->first_sg->dma_address = umem_p->first_dma_address;
+               umem_p->first_sg->length = umem_p->first_length;
+               sg_dma_len(umem_p->first_sg) = umem_p->first_dma_length;
+       }
+
+       peer_mem->dma_unmap(&umem_p->umem.sg_head, umem_p->peer_client_context,
+                           umem_p->umem.ibdev->dma_device);
+       peer_mem->put_pages(&umem_p->umem.sg_head, umem_p->peer_client_context);
+       memset(&umem->sg_head, 0, sizeof(umem->sg_head));
+
+       atomic64_add(umem->nmap, &ib_peer_client->stats.num_dereg_pages);
+       atomic64_add(umem->length, &ib_peer_client->stats.num_dereg_bytes);
+       atomic64_inc(&ib_peer_client->stats.num_dealloc_mrs);
+       if (umem_p->xa_id != PEER_NO_INVALIDATION_ID)
+               xa_store(&ib_peer_client->umem_xa, umem_p->xa_id, NULL,
+                        GFP_KERNEL);
+       umem_p->mapped = false;
+}
+
+static int ib_invalidate_peer_memory(void *reg_handle, u64 core_context)
+{
+       struct ib_peer_memory_client *ib_peer_client = reg_handle;
+       struct ib_umem_peer *umem_p;
+
+       /*
+        * The client is not required to fence against invalidation during
+        * put_pages() as that would deadlock when we call put_pages() here.
+        * Thus the core_context cannot be a umem pointer as we have no control
+        * over the lifetime. Since we won't change the kABI for this to add a
+        * proper kref, an xarray is used.
+        */
+       xa_lock(&ib_peer_client->umem_xa);
+       ib_peer_client->stats.num_free_callbacks += 1;
+       umem_p = xa_load(&ib_peer_client->umem_xa, core_context);
+       if (!umem_p)
+               goto out_unlock;
+       kref_get(&umem_p->kref);
+       xa_unlock(&ib_peer_client->umem_xa);
+
+       mutex_lock(&umem_p->mapping_lock);
+       if (umem_p->mapped) {
+               /*
+                * At this point the invalidation_func must be !NULL as the get
+                * flow does not unlock mapping_lock until it is set, and umems
+                * that do not require invalidation are not in the xarray.
+                */
+               umem_p->invalidation_func(&umem_p->umem,
+                                         umem_p->invalidation_private);
+               ib_unmap_peer_client(umem_p);
+       }
+       mutex_unlock(&umem_p->mapping_lock);
+       kref_put(&umem_p->kref, ib_peer_umem_kref_release);
+       return 0;
+
+out_unlock:
+       xa_unlock(&ib_peer_client->umem_xa);
+       return 0;
+}
+
+void ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
+                                           umem_invalidate_func_t func,
+                                           void *priv)
+{
+       struct ib_umem_peer *umem_p =
+               container_of(umem, struct ib_umem_peer, umem);
+
+       if (WARN_ON(!umem->is_peer))
+               return;
+       if (umem_p->xa_id == PEER_NO_INVALIDATION_ID)
+               return;
+
+       umem_p->invalidation_func = func;
+       umem_p->invalidation_private = priv;
+       /* Pairs with the lock in ib_peer_umem_get() */
+       mutex_unlock(&umem_p->mapping_lock);
+
+       /* At this point func can be called asynchronously */
+}
+EXPORT_SYMBOL(ib_umem_activate_invalidation_notifier);
+
+static void fix_peer_sgls(struct ib_umem_peer *umem_p, unsigned long peer_page_size)
+{
+       struct ib_umem *umem = &umem_p->umem;
+       struct scatterlist *sg;
+       int i;
+
+       for_each_sg(umem_p->umem.sg_head.sgl, sg, umem_p->umem.nmap, i) {
+               if (i == 0) {
+                       unsigned long offset;
+
+                       umem_p->first_sg = sg;
+                       umem_p->first_dma_address = sg->dma_address;
+                       umem_p->first_dma_length = sg_dma_len(sg);
+                       umem_p->first_length = sg->length;
+
+                       offset = ALIGN_DOWN(umem->address, PAGE_SIZE) -
+                                ALIGN_DOWN(umem->address, peer_page_size);
+                       sg->dma_address += offset;
+                       sg_dma_len(sg) -= offset;
+                       sg->length -= offset;
+               }
+
+               if (i == umem_p->umem.nmap - 1) {
+                       unsigned long trim;
+
+                       umem_p->last_sg = sg;
+                       umem_p->last_dma_length = sg_dma_len(sg);
+                       umem_p->last_length = sg->length;
+
+                       trim = ALIGN(umem->address + umem->length,
+                                    peer_page_size) -
+                              ALIGN(umem->address + umem->length, PAGE_SIZE);
+                       sg_dma_len(sg) -= trim;
+                       sg->length -= trim;
+               }
+       }
+}
+
+struct ib_umem *ib_peer_umem_get(struct ib_umem *old_umem, int old_ret,
+                                unsigned long peer_mem_flags)
+{
+       struct ib_peer_memory_client *ib_peer_client;
+       unsigned long peer_page_size;
+       void *peer_client_context;
+       struct ib_umem_peer *umem_p;
+       int ret;
+
+       ib_peer_client =
+               ib_get_peer_client(old_umem->address, old_umem->length,
+                                  peer_mem_flags, &peer_client_context);
+       if (!ib_peer_client)
+               return ERR_PTR(old_ret);
+
+       umem_p = kzalloc(sizeof(*umem_p), GFP_KERNEL);
+       if (!umem_p) {
+               ret = -ENOMEM;
+               goto err_client;
+       }
+
+       kref_init(&umem_p->kref);
+       umem_p->umem = *old_umem;
+       memset(&umem_p->umem.sg_head, 0, sizeof(umem_p->umem.sg_head));
+       umem_p->umem.is_peer = 1;
+       umem_p->ib_peer_client = ib_peer_client;
+       umem_p->peer_client_context = peer_client_context;
+       mutex_init(&umem_p->mapping_lock);
+       umem_p->xa_id = PEER_NO_INVALIDATION_ID;
+
+       mutex_lock(&umem_p->mapping_lock);
+       if (ib_peer_client->invalidation_required) {
+               ret = xa_alloc_cyclic(&ib_peer_client->umem_xa, &umem_p->xa_id,
+                                     umem_p,
+                                     XA_LIMIT(0, PEER_NO_INVALIDATION_ID - 1),
+                                     &ib_peer_client->xa_cyclic_next,
+                                     GFP_KERNEL);
+               if (ret < 0)
+                       goto err_umem;
+       }
+
+       /*
+        * We always request write permissions to the pages, to force breaking
+        * of any CoW during the registration of the MR. For read-only MRs we
+        * use the "force" flag to indicate that CoW breaking is required but
+        * the registration should not fail if referencing read-only areas.
+        */
+       ret = ib_peer_client->peer_mem->get_pages(umem_p->umem.address,
+                                                 umem_p->umem.length, 1,
+                                                 !umem_p->umem.writable, NULL,
+                                                 peer_client_context,
+                                                 umem_p->xa_id);
+       if (ret)
+               goto err_xa;
+
+       ret = ib_peer_client->peer_mem->dma_map(&umem_p->umem.sg_head,
+                                               peer_client_context,
+                                               umem_p->umem.ibdev->dma_device,
+                                               0, &umem_p->umem.nmap);
+       if (ret)
+               goto err_pages;
+
+       peer_page_size = ib_peer_client->peer_mem->get_page_size(peer_client_context);
+       if (peer_page_size != PAGE_SIZE)
+               fix_peer_sgls(umem_p, peer_page_size);
+
+       umem_p->mapped = true;
+       atomic64_add(umem_p->umem.nmap, &ib_peer_client->stats.num_reg_pages);
+       atomic64_add(umem_p->umem.length, &ib_peer_client->stats.num_reg_bytes);
+       atomic64_inc(&ib_peer_client->stats.num_alloc_mrs);
+
+       /*
+        * If invalidation is allowed then the caller must call
+        * ib_umem_activate_invalidation_notifier() or ib_peer_umem_release() to
+        * unlock this mutex. The call to  should be done after the last
+        * read to sg_head, once the caller is ready for the invalidation
+        * function to be called.
+        */
+       if (umem_p->xa_id == PEER_NO_INVALIDATION_ID)
+               mutex_unlock(&umem_p->mapping_lock);
+
+       /*
+        * On success the old umem is replaced with the new, larger, allocation
+        */
+       kfree(old_umem);
+       return &umem_p->umem;
+
+err_pages:
+       ib_peer_client->peer_mem->put_pages(&umem_p->umem.sg_head,
+                                           umem_p->peer_client_context);
+err_xa:
+       if (umem_p->xa_id != PEER_NO_INVALIDATION_ID)
+               xa_erase(&umem_p->ib_peer_client->umem_xa, umem_p->xa_id);
+err_umem:
+       mutex_unlock(&umem_p->mapping_lock);
+       kref_put(&umem_p->kref, ib_peer_umem_kref_release);
+err_client:
+       ib_put_peer_client(ib_peer_client, peer_client_context);
+       return ERR_PTR(ret);
+}
+
+void ib_peer_umem_release(struct ib_umem *umem)
+{
+       struct ib_umem_peer *umem_p =
+               container_of(umem, struct ib_umem_peer, umem);
+
+       /* invalidation_func being set indicates activate was called */
+       if (umem_p->xa_id == PEER_NO_INVALIDATION_ID ||
+           umem_p->invalidation_func)
+               mutex_lock(&umem_p->mapping_lock);
+
+       if (umem_p->mapped)
+               ib_unmap_peer_client(umem_p);
+       mutex_unlock(&umem_p->mapping_lock);
+
+       if (umem_p->xa_id != PEER_NO_INVALIDATION_ID)
+               xa_erase(&umem_p->ib_peer_client->umem_xa, umem_p->xa_id);
+       ib_put_peer_client(umem_p->ib_peer_client, umem_p->peer_client_context);
+       umem_p->ib_peer_client = NULL;
+
+       /* Must match ib_umem_release() */
+       atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
+       mmdrop(umem->owning_mm);
+
+       kref_put(&umem_p->kref, ib_peer_umem_kref_release);
+}
index 917338db7ac13e8d6582c5ba84f00b6b85b369b4..fc6fb83a0e47697d9278c6b02f7958b3720aeefc 100644 (file)
@@ -44,6 +44,8 @@
 
 #include "uverbs.h"
 
+#include "ib_peer_mem.h"
+
 static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
 {
        struct sg_page_iter sg_iter;
@@ -140,15 +142,17 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
 EXPORT_SYMBOL(ib_umem_find_best_pgsz);
 
 /**
- * ib_umem_get - Pin and DMA map userspace memory.
+ * __ib_umem_get - Pin and DMA map userspace memory.
  *
  * @device: IB device to connect UMEM
  * @addr: userspace virtual address to start at
  * @size: length of region to pin
  * @access: IB_ACCESS_xxx flags for memory being pinned
+ * @peer_mem_flags: IB_PEER_MEM_xxx flags for memory being used
  */
-struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
-                           size_t size, int access)
+static struct ib_umem *__ib_umem_get(struct ib_device *device,
+                                   unsigned long addr, size_t size, int access,
+                                   unsigned long peer_mem_flags)
 {
        struct ib_umem *umem;
        struct page **page_list;
@@ -258,6 +262,26 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
 
 umem_release:
        __ib_umem_release(device, umem, 0);
+
+       /*
+        * If the address belongs to peer memory client, then the first
+        * call to get_user_pages will fail. In this case, try to get
+        * these pages from the peers.
+        */
+       //FIXME: this placement is horrible
+       if (ret < 0 && peer_mem_flags & IB_PEER_MEM_ALLOW) {
+               struct ib_umem *new_umem;
+
+               new_umem = ib_peer_umem_get(umem, ret, peer_mem_flags);
+               if (IS_ERR(new_umem)) {
+                       ret = PTR_ERR(new_umem);
+                       goto vma;
+               }
+               umem = new_umem;
+               ret = 0;
+               goto out;
+       }
+vma:
        atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
 out:
        free_page((unsigned long) page_list);
@@ -268,8 +292,23 @@ umem_kfree:
        }
        return ret ? ERR_PTR(ret) : umem;
 }
+
+struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
+                           size_t size, int access)
+{
+       return __ib_umem_get(device, addr, size, access, 0);
+}
 EXPORT_SYMBOL(ib_umem_get);
 
+struct ib_umem *ib_umem_get_peer(struct ib_device *device, unsigned long addr,
+                                size_t size, int access,
+                                unsigned long peer_mem_flags)
+{
+       return __ib_umem_get(device, addr, size, access,
+                            IB_PEER_MEM_ALLOW | peer_mem_flags);
+}
+EXPORT_SYMBOL(ib_umem_get_peer);
+
 /**
  * ib_umem_release - release memory pinned with ib_umem_get
  * @umem: umem struct to release
@@ -281,6 +320,8 @@ void ib_umem_release(struct ib_umem *umem)
        if (umem->is_odp)
                return ib_umem_odp_release(to_ib_umem_odp(umem));
 
+       if (umem->is_peer)
+               return ib_peer_umem_release(umem);
        __ib_umem_release(umem->ibdev, umem, 1);
 
        atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
index eb92cefffd7771a2950aaeb13ba124b557c4bc3d..40f091a523b63b1278b69d80f23b0a7af2ab5436 100644 (file)
@@ -734,9 +734,9 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
 
        *cqe_size = ucmd.cqe_size;
 
-       cq->buf.umem =
-               ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
-                           entries * ucmd.cqe_size, IB_ACCESS_LOCAL_WRITE);
+       cq->buf.umem = ib_umem_get_peer(&dev->ib_dev, ucmd.buf_addr,
+                                       entries * ucmd.cqe_size,
+                                       IB_ACCESS_LOCAL_WRITE, 0);
        if (IS_ERR(cq->buf.umem)) {
                err = PTR_ERR(cq->buf.umem);
                return err;
@@ -1157,9 +1157,9 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
        if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1)
                return -EINVAL;
 
-       umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
-                          (size_t)ucmd.cqe_size * entries,
-                          IB_ACCESS_LOCAL_WRITE);
+       umem = ib_umem_get_peer(&dev->ib_dev, ucmd.buf_addr,
+                               (size_t)ucmd.cqe_size * entries,
+                               IB_ACCESS_LOCAL_WRITE, 0);
        if (IS_ERR(umem)) {
                err = PTR_ERR(umem);
                return err;
index 8161035eb7740e7421eacd7b1fcc0faf1cb8dd85..8a5c2301c8fff72bfcc68f642749989f3400fec9 100644 (file)
@@ -2076,7 +2076,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
        if (err)
                return err;
 
-       obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access);
+       obj->umem = ib_umem_get_peer(&dev->ib_dev, addr, size, access, 0);
        if (IS_ERR(obj->umem))
                return PTR_ERR(obj->umem);
        return 0;
index 61475b571531271f22bccc852d1bf6051898386f..ce05ea3df27503cba2c7c0376a8a59ad379b3f69 100644 (file)
@@ -64,8 +64,9 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
 
        page->user_virt = (virt & PAGE_MASK);
        page->refcnt    = 0;
-       page->umem = ib_umem_get(context->ibucontext.device, virt & PAGE_MASK,
-                                PAGE_SIZE, 0);
+       page->umem =
+               ib_umem_get_peer(context->ibucontext.device, virt & PAGE_MASK,
+                                PAGE_SIZE, 0, 0);
        if (IS_ERR(page->umem)) {
                err = PTR_ERR(page->umem);
                kfree(page);
index 24f8d59a42eae6657ef14296c4c2291f79a3a37b..1778606a794d55a9ac8d7ca4428ab9e28c64781b 100644 (file)
@@ -47,6 +47,7 @@
  */
 void *xlt_emergency_page;
 static DEFINE_MUTEX(xlt_emergency_page_mutex);
+static void mlx5_invalidate_umem(struct ib_umem *umem, void *priv);
 
 enum {
        MAX_PENDING_REG_MR = 8,
@@ -1485,6 +1486,11 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
                        return ERR_PTR(err);
                }
        }
+
+       if (umem->is_peer)
+               ib_umem_activate_invalidation_notifier(
+                       umem, mlx5_invalidate_umem, mr);
+
        return &mr->ibmr;
 }
 
@@ -1561,7 +1567,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        if (access_flags & IB_ACCESS_ON_DEMAND)
                return create_user_odp_mr(pd, start, length, iova, access_flags,
                                          udata);
-       umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
+       umem = ib_umem_get_peer(&dev->ib_dev, start, length, access_flags,
+                               IB_PEER_MEM_INVAL_SUPP);
        if (IS_ERR(umem))
                return ERR_CAST(umem);
        return create_real_mr(pd, umem, iova, access_flags);
@@ -1698,6 +1705,10 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
                return err;
        }
 
+       if (new_umem->is_peer)
+               ib_umem_activate_invalidation_notifier(
+                       new_umem, mlx5_invalidate_umem, mr);
+
        atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages);
        ib_umem_release(old_umem);
        atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages);
@@ -1771,8 +1782,9 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
                struct ib_umem *new_umem;
                unsigned long page_size;
 
-               new_umem = ib_umem_get(&dev->ib_dev, start, length,
-                                      new_access_flags);
+               new_umem = ib_umem_get_peer(&dev->ib_dev, start, length,
+                                           new_access_flags,
+                                           IB_PEER_MEM_INVAL_SUPP);
                if (IS_ERR(new_umem))
                        return ERR_CAST(new_umem);
 
@@ -2610,3 +2622,15 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
 
        return n;
 }
+
+static void mlx5_invalidate_umem(struct ib_umem *umem, void *priv)
+{
+       struct mlx5_ib_mr *mr = priv;
+
+       /*
+        * DMA is turned off for the mkey, but the mkey remains otherwise
+        * untouched until the normal flow of dereg_mr happens. Any access to
+        * this mkey will generate CQEs.
+        */
+       mlx5_mr_cache_invalidate(mr);
+}
index bab40ad527dae377a79abb59f10fb25ff3e80a83..6ba082ec5f27bd555c293a5330e9b9be0218ebe2 100644 (file)
@@ -807,7 +807,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        if (!ucmd->buf_addr)
                return -EINVAL;
 
-       rwq->umem = ib_umem_get(&dev->ib_dev, ucmd->buf_addr, rwq->buf_size, 0);
+       rwq->umem = ib_umem_get_peer(&dev->ib_dev, ucmd->buf_addr, rwq->buf_size, 0, 0);
        if (IS_ERR(rwq->umem)) {
                mlx5_ib_dbg(dev, "umem_get failed\n");
                err = PTR_ERR(rwq->umem);
@@ -917,8 +917,9 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 
        if (ucmd->buf_addr && ubuffer->buf_size) {
                ubuffer->buf_addr = ucmd->buf_addr;
-               ubuffer->umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr,
-                                           ubuffer->buf_size, 0);
+               ubuffer->umem =
+                       ib_umem_get_peer(&dev->ib_dev, ubuffer->buf_addr,
+                                        ubuffer->buf_size, 0, 0);
                if (IS_ERR(ubuffer->umem)) {
                        err = PTR_ERR(ubuffer->umem);
                        goto err_bfreg;
@@ -1188,8 +1189,8 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
        unsigned int page_offset_quantized;
        unsigned long page_size;
 
-       sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr,
-                                      ubuffer->buf_size, 0);
+       sq->ubuffer.umem = ib_umem_get_peer(&dev->ib_dev, ubuffer->buf_addr,
+                                      ubuffer->buf_size, 0, 0);
        if (IS_ERR(sq->ubuffer.umem))
                return PTR_ERR(sq->ubuffer.umem);
        page_size = mlx5_umem_find_best_quantized_pgoff(
index fab6736e4d6a05ee27dfdb6a625e28d702d46b49..e2157fb5a7fd535e994d3371b6417fffaa445a9a 100644 (file)
@@ -76,7 +76,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 
        srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
 
-       srq->umem = ib_umem_get(pd->device, ucmd.buf_addr, buf_size, 0);
+       srq->umem = ib_umem_get_peer(pd->device, ucmd.buf_addr, buf_size, 0, 0);
        if (IS_ERR(srq->umem)) {
                mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size);
                err = PTR_ERR(srq->umem);
index 7752211c96384fd8c02155c3f64d815ee9df7a4d..f2e407be55042c8df9acf632b7eeeaacdd5d3308 100644 (file)
@@ -22,12 +22,20 @@ struct ib_umem {
        unsigned long           address;
        u32 writable : 1;
        u32 is_odp : 1;
+       /* Placing at the end of the bitfield list is ABI preserving on LE */
+       u32 is_peer : 1;
        struct work_struct      work;
        struct sg_table sg_head;
        int             nmap;
        unsigned int    sg_nents;
 };
 
+typedef void (*umem_invalidate_func_t)(struct ib_umem *umem, void *priv);
+enum ib_peer_mem_flags {
+       IB_PEER_MEM_ALLOW = 1 << 0,
+       IB_PEER_MEM_INVAL_SUPP = 1 << 1,
+};
+
 /* Returns the offset of the umem start relative to the first page. */
 static inline int ib_umem_offset(struct ib_umem *umem)
 {
@@ -116,6 +124,13 @@ static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem,
                                      dma_addr & pgoff_bitmask);
 }
 
+struct ib_umem *ib_umem_get_peer(struct ib_device *device, unsigned long addr,
+                                size_t size, int access,
+                                unsigned long peer_mem_flags);
+void ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
+                                          umem_invalidate_func_t func,
+                                          void *cookie);
+
 #else /* CONFIG_INFINIBAND_USER_MEM */
 
 #include <linux/err.h>
@@ -143,6 +158,17 @@ static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem,
 {
        return 0;
 }
+static inline struct ib_umem *ib_umem_get_peer(struct ib_device *device,
+                                              unsigned long addr, size_t size,
+                                              int access,
+                                              unsigned long peer_mem_flags)
+{
+       return ERR_PTR(-EINVAL);
+}
+static inline void ib_umem_activate_invalidation_notifier(
+       struct ib_umem *umem, umem_invalidate_func_t func, void *cookie)
+{
+}
 
 #endif /* CONFIG_INFINIBAND_USER_MEM */
 
diff --git a/include/rdma/peer_mem.h b/include/rdma/peer_mem.h
new file mode 100644 (file)
index 0000000..4c2e633
--- /dev/null
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2014-2020,  Mellanox Technologies. All rights reserved.
+ */
+#ifndef RDMA_PEER_MEM_H
+#define RDMA_PEER_MEM_H
+
+#include <linux/scatterlist.h>
+
+#define IB_PEER_MEMORY_NAME_MAX 64
+#define IB_PEER_MEMORY_VER_MAX 16
+
+/*
+ * Prior versions used a void * for core_context, at some point this was
+ * switched to use u64. Be careful if compiling this as 32 bit. To help the
+ * value of core_context is limited to u32 so it should work OK despite the
+ * type change.
+ */
+#define PEER_MEM_U64_CORE_CONTEXT
+
+struct device;
+
+/**
+ *  struct peer_memory_client - registration information for user virtual
+ *                              memory handlers
+ *
+ * The peer_memory_client scheme allows a driver to register with the ib_umem
+ * system that it has the ability to understand user virtual address ranges
+ * that are not compatible with get_user_pages(). For instance VMAs created
+ * with io_remap_pfn_range(), or other driver special VMA.
+ *
+ * For ranges the interface understands it can provide a DMA mapped sg_table
+ * for use by the ib_umem, allowing user virtual ranges that cannot be
+ * supported by get_user_pages() to be used as umems.
+ */
+struct peer_memory_client {
+       char name[IB_PEER_MEMORY_NAME_MAX];
+       char version[IB_PEER_MEMORY_VER_MAX];
+
+       /**
+        * acquire - Begin working with a user space virtual address range
+        *
+        * @addr - Virtual address to be checked whether belongs to peer.
+        * @size - Length of the virtual memory area starting at addr.
+        * @peer_mem_private_data - Obsolete, always NULL
+        * @peer_mem_name - Obsolete, always NULL
+        * @client_context - Returns an opaque value for this acquire use in
+        *                   other APIs
+        *
+        * Returns 1 if the peer_memory_client supports the entire virtual
+        * address range, 0 or -ERRNO otherwise.  If 1 is returned then
+        * release() will be called to release the acquire().
+        */
+       int (*acquire)(unsigned long addr, size_t size,
+                      void *peer_mem_private_data, char *peer_mem_name,
+                      void **client_context);
+       /**
+        * get_pages - Fill in the first part of a sg_table for a virtual
+        *             address range
+        *
+        * @addr - Virtual address to be checked whether belongs to peer.
+        * @size - Length of the virtual memory area starting at addr.
+        * @write - Always 1
+        * @force - 1 if write is required
+        * @sg_head - Obsolete, always NULL
+        * @client_context - Value returned by acquire()
+        * @core_context - Value to be passed to invalidate_peer_memory for
+        *                 this get
+        *
+        * addr/size are passed as the raw virtual address range requested by
+        * the user, it is not aligned to any page size. get_pages() is always
+        * followed by dma_map().
+        *
+        * Upon return the caller can call the invalidate_callback().
+        *
+        * Returns 0 on success, -ERRNO on failure. After success put_pages()
+        * will be called to return the pages.
+        */
+       int (*get_pages)(unsigned long addr, size_t size, int write, int force,
+                        struct sg_table *sg_head, void *client_context,
+                        u64 core_context);
+       /**
+        * dma_map - Create a DMA mapped sg_table
+        *
+        * @sg_head - The sg_table to allocate
+        * @client_context - Value returned by acquire()
+        * @dma_device - The device that will be doing DMA from these addresses
+        * @dmasync - Obsolete, always 0
+        * @nmap - Returns the number of dma mapped entries in the sg_head
+        *
+        * Must be called after get_pages(). This must fill in the sg_head with
+        * DMA mapped SGLs for dma_device. Each SGL start and end must meet a
+        * minimum alignment of at least PAGE_SIZE, though individual sgls can
+        * be multiples of PAGE_SIZE, in any mixture. Since the user virtual
+        * address/size are not page aligned, the implementation must increase
+        * it to the logical alignment when building the SGLs.
+        *
+        * Returns 0 on success, -ERRNO on failure. After success dma_unmap()
+        * will be called to unmap the pages. On failure sg_head must be left
+        * untouched or point to a valid sg_table.
+        */
+       int (*dma_map)(struct sg_table *sg_head, void *client_context,
+                      struct device *dma_device, int dmasync, int *nmap);
+       /**
+        * dma_unmap - Unmap a DMA mapped sg_table
+        *
+        * @sg_head - The sg_table to unmap
+        * @client_context - Value returned by acquire()
+        * @dma_device - The device that will be doing DMA from these addresses
+        *
+        * sg_head will not be touched after this function returns.
+        *
+        * Must return 0.
+        */
+       int (*dma_unmap)(struct sg_table *sg_head, void *client_context,
+                        struct device *dma_device);
+       /**
+        * put_pages - Unpin a SGL
+        *
+        * @sg_head - The sg_table to unpin
+        * @client_context - Value returned by acquire()
+        *
+        * sg_head must be freed on return.
+        */
+       void (*put_pages)(struct sg_table *sg_head, void *client_context);
+       /* Client should always return PAGE_SIZE */
+       unsigned long (*get_page_size)(void *client_context);
+       /**
+        * release - Undo acquire
+        *
+        * @client_context - Value returned by acquire()
+        *
+        * If acquire() returns 1 then release() must be called. All
+        * get_pages() and dma_map()'s must be undone before calling this
+        * function.
+        */
+       void (*release)(void *client_context);
+};
+
+/*
+ * If invalidate_callback() is non-NULL then the client will only support
+ * umems which can be invalidated. The caller may call the
+ * invalidate_callback() after acquire() on return the range will no longer
+ * have DMA active, and release() will have been called.
+ *
+ * Note: The implementation locking must ensure that get_pages(), and
+ * dma_map() do not have locking dependencies with invalidate_callback(). The
+ * ib_core will wait until any concurrent get_pages() or dma_map() completes
+ * before returning.
+ *
+ * Similarly, this can call dma_unmap(), put_pages() and release() from within
+ * the callback, or will wait for another thread doing those operations to
+ * complete.
+ *
+ * For these reasons the user of invalidate_callback() must be careful with
+ * locking.
+ */
+typedef int (*invalidate_peer_memory)(void *reg_handle, u64 core_context);
+
+void *
+ib_register_peer_memory_client(const struct peer_memory_client *peer_client,
+                              invalidate_peer_memory *invalidate_callback);
+void ib_unregister_peer_memory_client(void *reg_handle);
+
+#endif