--- /dev/null
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2014-2020, Mellanox Technologies. All rights reserved.
+ */
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <linux/sched/mm.h>
+#include "ib_peer_mem.h"
+
+static DEFINE_MUTEX(peer_memory_mutex);
+static LIST_HEAD(peer_memory_list);
+static struct kobject *peers_kobj;
+#define PEER_NO_INVALIDATION_ID U32_MAX
+
+static int ib_invalidate_peer_memory(void *reg_handle, u64 core_context);
+
+struct peer_mem_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct ib_peer_memory_client *ib_peer_client,
+ struct peer_mem_attribute *attr, char *buf);
+ ssize_t (*store)(struct ib_peer_memory_client *ib_peer_client,
+ struct peer_mem_attribute *attr, const char *buf,
+ size_t count);
+};
+#define PEER_ATTR_RO(_name) \
+ struct peer_mem_attribute peer_attr_ ## _name = __ATTR_RO(_name)
+
+static ssize_t version_show(struct ib_peer_memory_client *ib_peer_client,
+ struct peer_mem_attribute *attr, char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "%s\n",
+ ib_peer_client->peer_mem->version);
+}
+static PEER_ATTR_RO(version);
+
+static ssize_t num_alloc_mrs_show(struct ib_peer_memory_client *ib_peer_client,
+ struct peer_mem_attribute *attr, char *buf)
+{
+ return scnprintf(
+ buf, PAGE_SIZE, "%llu\n",
+ (u64)atomic64_read(&ib_peer_client->stats.num_alloc_mrs));
+}
+static PEER_ATTR_RO(num_alloc_mrs);
+
+static ssize_t
+num_dealloc_mrs_show(struct ib_peer_memory_client *ib_peer_client,
+ struct peer_mem_attribute *attr, char *buf)
+
+{
+ return scnprintf(
+ buf, PAGE_SIZE, "%llu\n",
+ (u64)atomic64_read(&ib_peer_client->stats.num_dealloc_mrs));
+}
+static PEER_ATTR_RO(num_dealloc_mrs);
+
+static ssize_t num_reg_pages_show(struct ib_peer_memory_client *ib_peer_client,
+ struct peer_mem_attribute *attr, char *buf)
+{
+ return scnprintf(
+ buf, PAGE_SIZE, "%llu\n",
+ (u64)atomic64_read(&ib_peer_client->stats.num_reg_pages));
+}
+static PEER_ATTR_RO(num_reg_pages);
+
+static ssize_t
+num_dereg_pages_show(struct ib_peer_memory_client *ib_peer_client,
+ struct peer_mem_attribute *attr, char *buf)
+{
+ return scnprintf(
+ buf, PAGE_SIZE, "%llu\n",
+ (u64)atomic64_read(&ib_peer_client->stats.num_dereg_pages));
+}
+static PEER_ATTR_RO(num_dereg_pages);
+
+static ssize_t num_reg_bytes_show(struct ib_peer_memory_client *ib_peer_client,
+ struct peer_mem_attribute *attr, char *buf)
+{
+ return scnprintf(
+ buf, PAGE_SIZE, "%llu\n",
+ (u64)atomic64_read(&ib_peer_client->stats.num_reg_bytes));
+}
+static PEER_ATTR_RO(num_reg_bytes);
+
+static ssize_t
+num_dereg_bytes_show(struct ib_peer_memory_client *ib_peer_client,
+ struct peer_mem_attribute *attr, char *buf)
+{
+ return scnprintf(
+ buf, PAGE_SIZE, "%llu\n",
+ (u64)atomic64_read(&ib_peer_client->stats.num_dereg_bytes));
+}
+static PEER_ATTR_RO(num_dereg_bytes);
+
+static ssize_t
+num_free_callbacks_show(struct ib_peer_memory_client *ib_peer_client,
+ struct peer_mem_attribute *attr, char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "%lu\n",
+ ib_peer_client->stats.num_free_callbacks);
+}
+static PEER_ATTR_RO(num_free_callbacks);
+
+static struct attribute *peer_mem_attrs[] = {
+ &peer_attr_version.attr,
+ &peer_attr_num_alloc_mrs.attr,
+ &peer_attr_num_dealloc_mrs.attr,
+ &peer_attr_num_reg_pages.attr,
+ &peer_attr_num_dereg_pages.attr,
+ &peer_attr_num_reg_bytes.attr,
+ &peer_attr_num_dereg_bytes.attr,
+ &peer_attr_num_free_callbacks.attr,
+ NULL,
+};
+
+static const struct attribute_group peer_mem_attr_group = {
+ .attrs = peer_mem_attrs,
+};
+
+static ssize_t peer_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct peer_mem_attribute *peer_attr =
+ container_of(attr, struct peer_mem_attribute, attr);
+
+ if (!peer_attr->show)
+ return -EIO;
+ return peer_attr->show(container_of(kobj, struct ib_peer_memory_client,
+ kobj),
+ peer_attr, buf);
+}
+
+static const struct sysfs_ops peer_mem_sysfs_ops = {
+ .show = peer_attr_show,
+};
+
+static void ib_peer_memory_client_release(struct kobject *kobj)
+{
+ struct ib_peer_memory_client *ib_peer_client =
+ container_of(kobj, struct ib_peer_memory_client, kobj);
+
+ kfree(ib_peer_client);
+}
+
+static struct kobj_type peer_mem_type = {
+ .sysfs_ops = &peer_mem_sysfs_ops,
+ .release = ib_peer_memory_client_release,
+};
+
+static int ib_memory_peer_check_mandatory(const struct peer_memory_client
+ *peer_client)
+{
+#define PEER_MEM_MANDATORY_FUNC(x) {offsetof(struct peer_memory_client, x), #x}
+ int i;
+ static const struct {
+ size_t offset;
+ char *name;
+ } mandatory_table[] = {
+ PEER_MEM_MANDATORY_FUNC(acquire),
+ PEER_MEM_MANDATORY_FUNC(get_pages),
+ PEER_MEM_MANDATORY_FUNC(put_pages),
+ PEER_MEM_MANDATORY_FUNC(dma_map),
+ PEER_MEM_MANDATORY_FUNC(dma_unmap),
+ };
+
+ for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
+ if (!*(void **)((void *)peer_client +
+ mandatory_table[i].offset)) {
+ pr_err("Peer memory %s is missing mandatory function %s\n",
+ peer_client->name, mandatory_table[i].name);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+void *
+ib_register_peer_memory_client(const struct peer_memory_client *peer_client,
+ invalidate_peer_memory *invalidate_callback)
+{
+ struct ib_peer_memory_client *ib_peer_client;
+ int ret;
+
+ if (ib_memory_peer_check_mandatory(peer_client))
+ return NULL;
+
+ ib_peer_client = kzalloc(sizeof(*ib_peer_client), GFP_KERNEL);
+ if (!ib_peer_client)
+ return NULL;
+ kobject_init(&ib_peer_client->kobj, &peer_mem_type);
+ refcount_set(&ib_peer_client->usecnt, 1);
+ init_completion(&ib_peer_client->usecnt_zero);
+ ib_peer_client->peer_mem = peer_client;
+ xa_init_flags(&ib_peer_client->umem_xa, XA_FLAGS_ALLOC);
+
+ /*
+ * If the peer wants the invalidation_callback then all memory users
+ * linked to that peer must support invalidation.
+ */
+ if (invalidate_callback) {
+ *invalidate_callback = ib_invalidate_peer_memory;
+ ib_peer_client->invalidation_required = true;
+ }
+
+ mutex_lock(&peer_memory_mutex);
+ if (!peers_kobj) {
+ /* Created under /sys/kernel/mm */
+ peers_kobj = kobject_create_and_add("memory_peers", mm_kobj);
+ if (!peers_kobj)
+ goto err_unlock;
+ }
+
+ ret = kobject_add(&ib_peer_client->kobj, peers_kobj, peer_client->name);
+ if (ret)
+ goto err_parent;
+
+ ret = sysfs_create_group(&ib_peer_client->kobj,
+ &peer_mem_attr_group);
+ if (ret)
+ goto err_parent;
+ list_add_tail(&ib_peer_client->core_peer_list, &peer_memory_list);
+ mutex_unlock(&peer_memory_mutex);
+ return ib_peer_client;
+
+err_parent:
+ if (list_empty(&peer_memory_list)) {
+ kobject_put(peers_kobj);
+ peers_kobj = NULL;
+ }
+err_unlock:
+ mutex_unlock(&peer_memory_mutex);
+ kobject_put(&ib_peer_client->kobj);
+ return NULL;
+}
+EXPORT_SYMBOL(ib_register_peer_memory_client);
+
+void ib_unregister_peer_memory_client(void *reg_handle)
+{
+ struct ib_peer_memory_client *ib_peer_client = reg_handle;
+
+ mutex_lock(&peer_memory_mutex);
+ list_del(&ib_peer_client->core_peer_list);
+ if (list_empty(&peer_memory_list)) {
+ kobject_put(peers_kobj);
+ peers_kobj = NULL;
+ }
+ mutex_unlock(&peer_memory_mutex);
+
+ /*
+ * Wait for all umems to be destroyed before returning. Once
+ * ib_unregister_peer_memory_client() returns no umems will call any
+ * peer_mem ops.
+ */
+ if (refcount_dec_and_test(&ib_peer_client->usecnt))
+ complete(&ib_peer_client->usecnt_zero);
+ wait_for_completion(&ib_peer_client->usecnt_zero);
+
+ kobject_put(&ib_peer_client->kobj);
+}
+EXPORT_SYMBOL(ib_unregister_peer_memory_client);
+
+static struct ib_peer_memory_client *
+ib_get_peer_client(unsigned long addr, size_t size,
+ unsigned long peer_mem_flags, void **peer_client_context)
+{
+ struct ib_peer_memory_client *ib_peer_client;
+ int ret = 0;
+
+ mutex_lock(&peer_memory_mutex);
+ list_for_each_entry(ib_peer_client, &peer_memory_list,
+ core_peer_list) {
+ if (ib_peer_client->invalidation_required &&
+ (!(peer_mem_flags & IB_PEER_MEM_INVAL_SUPP)))
+ continue;
+ ret = ib_peer_client->peer_mem->acquire(addr, size, NULL, NULL,
+ peer_client_context);
+ if (ret > 0) {
+ refcount_inc(&ib_peer_client->usecnt);
+ mutex_unlock(&peer_memory_mutex);
+ return ib_peer_client;
+ }
+ }
+ mutex_unlock(&peer_memory_mutex);
+ return NULL;
+}
+
+static void ib_put_peer_client(struct ib_peer_memory_client *ib_peer_client,
+ void *peer_client_context)
+{
+ if (ib_peer_client->peer_mem->release)
+ ib_peer_client->peer_mem->release(peer_client_context);
+ if (refcount_dec_and_test(&ib_peer_client->usecnt))
+ complete(&ib_peer_client->usecnt_zero);
+}
+
+static void ib_peer_umem_kref_release(struct kref *kref)
+{
+ kfree(container_of(kref, struct ib_umem_peer, kref));
+}
+
+static void ib_unmap_peer_client(struct ib_umem_peer *umem_p)
+{
+ struct ib_peer_memory_client *ib_peer_client = umem_p->ib_peer_client;
+ const struct peer_memory_client *peer_mem = ib_peer_client->peer_mem;
+ struct ib_umem *umem = &umem_p->umem;
+
+ lockdep_assert_held(&umem_p->mapping_lock);
+
+ if (umem_p->last_sg) {
+ umem_p->last_sg->length = umem_p->last_length;
+ sg_dma_len(umem_p->last_sg) = umem_p->last_dma_length;
+ }
+
+ if (umem_p->first_sg) {
+ umem_p->first_sg->dma_address = umem_p->first_dma_address;
+ umem_p->first_sg->length = umem_p->first_length;
+ sg_dma_len(umem_p->first_sg) = umem_p->first_dma_length;
+ }
+
+ peer_mem->dma_unmap(&umem_p->umem.sg_head, umem_p->peer_client_context,
+ umem_p->umem.ibdev->dma_device);
+ peer_mem->put_pages(&umem_p->umem.sg_head, umem_p->peer_client_context);
+ memset(&umem->sg_head, 0, sizeof(umem->sg_head));
+
+ atomic64_add(umem->nmap, &ib_peer_client->stats.num_dereg_pages);
+ atomic64_add(umem->length, &ib_peer_client->stats.num_dereg_bytes);
+ atomic64_inc(&ib_peer_client->stats.num_dealloc_mrs);
+ if (umem_p->xa_id != PEER_NO_INVALIDATION_ID)
+ xa_store(&ib_peer_client->umem_xa, umem_p->xa_id, NULL,
+ GFP_KERNEL);
+ umem_p->mapped = false;
+}
+
+static int ib_invalidate_peer_memory(void *reg_handle, u64 core_context)
+{
+ struct ib_peer_memory_client *ib_peer_client = reg_handle;
+ struct ib_umem_peer *umem_p;
+
+ /*
+ * The client is not required to fence against invalidation during
+ * put_pages() as that would deadlock when we call put_pages() here.
+ * Thus the core_context cannot be a umem pointer as we have no control
+ * over the lifetime. Since we won't change the kABI for this to add a
+ * proper kref, an xarray is used.
+ */
+ xa_lock(&ib_peer_client->umem_xa);
+ ib_peer_client->stats.num_free_callbacks += 1;
+ umem_p = xa_load(&ib_peer_client->umem_xa, core_context);
+ if (!umem_p)
+ goto out_unlock;
+ kref_get(&umem_p->kref);
+ xa_unlock(&ib_peer_client->umem_xa);
+
+ mutex_lock(&umem_p->mapping_lock);
+ if (umem_p->mapped) {
+ /*
+ * At this point the invalidation_func must be !NULL as the get
+ * flow does not unlock mapping_lock until it is set, and umems
+ * that do not require invalidation are not in the xarray.
+ */
+ umem_p->invalidation_func(&umem_p->umem,
+ umem_p->invalidation_private);
+ ib_unmap_peer_client(umem_p);
+ }
+ mutex_unlock(&umem_p->mapping_lock);
+ kref_put(&umem_p->kref, ib_peer_umem_kref_release);
+ return 0;
+
+out_unlock:
+ xa_unlock(&ib_peer_client->umem_xa);
+ return 0;
+}
+
+void ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
+ umem_invalidate_func_t func,
+ void *priv)
+{
+ struct ib_umem_peer *umem_p =
+ container_of(umem, struct ib_umem_peer, umem);
+
+ if (WARN_ON(!umem->is_peer))
+ return;
+ if (umem_p->xa_id == PEER_NO_INVALIDATION_ID)
+ return;
+
+ umem_p->invalidation_func = func;
+ umem_p->invalidation_private = priv;
+ /* Pairs with the lock in ib_peer_umem_get() */
+ mutex_unlock(&umem_p->mapping_lock);
+
+ /* At this point func can be called asynchronously */
+}
+EXPORT_SYMBOL(ib_umem_activate_invalidation_notifier);
+
+static void fix_peer_sgls(struct ib_umem_peer *umem_p, unsigned long peer_page_size)
+{
+ struct ib_umem *umem = &umem_p->umem;
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(umem_p->umem.sg_head.sgl, sg, umem_p->umem.nmap, i) {
+ if (i == 0) {
+ unsigned long offset;
+
+ umem_p->first_sg = sg;
+ umem_p->first_dma_address = sg->dma_address;
+ umem_p->first_dma_length = sg_dma_len(sg);
+ umem_p->first_length = sg->length;
+
+ offset = ALIGN_DOWN(umem->address, PAGE_SIZE) -
+ ALIGN_DOWN(umem->address, peer_page_size);
+ sg->dma_address += offset;
+ sg_dma_len(sg) -= offset;
+ sg->length -= offset;
+ }
+
+ if (i == umem_p->umem.nmap - 1) {
+ unsigned long trim;
+
+ umem_p->last_sg = sg;
+ umem_p->last_dma_length = sg_dma_len(sg);
+ umem_p->last_length = sg->length;
+
+ trim = ALIGN(umem->address + umem->length,
+ peer_page_size) -
+ ALIGN(umem->address + umem->length, PAGE_SIZE);
+ sg_dma_len(sg) -= trim;
+ sg->length -= trim;
+ }
+ }
+}
+
+struct ib_umem *ib_peer_umem_get(struct ib_umem *old_umem, int old_ret,
+ unsigned long peer_mem_flags)
+{
+ struct ib_peer_memory_client *ib_peer_client;
+ unsigned long peer_page_size;
+ void *peer_client_context;
+ struct ib_umem_peer *umem_p;
+ int ret;
+
+ ib_peer_client =
+ ib_get_peer_client(old_umem->address, old_umem->length,
+ peer_mem_flags, &peer_client_context);
+ if (!ib_peer_client)
+ return ERR_PTR(old_ret);
+
+ umem_p = kzalloc(sizeof(*umem_p), GFP_KERNEL);
+ if (!umem_p) {
+ ret = -ENOMEM;
+ goto err_client;
+ }
+
+ kref_init(&umem_p->kref);
+ umem_p->umem = *old_umem;
+ memset(&umem_p->umem.sg_head, 0, sizeof(umem_p->umem.sg_head));
+ umem_p->umem.is_peer = 1;
+ umem_p->ib_peer_client = ib_peer_client;
+ umem_p->peer_client_context = peer_client_context;
+ mutex_init(&umem_p->mapping_lock);
+ umem_p->xa_id = PEER_NO_INVALIDATION_ID;
+
+ mutex_lock(&umem_p->mapping_lock);
+ if (ib_peer_client->invalidation_required) {
+ ret = xa_alloc_cyclic(&ib_peer_client->umem_xa, &umem_p->xa_id,
+ umem_p,
+ XA_LIMIT(0, PEER_NO_INVALIDATION_ID - 1),
+ &ib_peer_client->xa_cyclic_next,
+ GFP_KERNEL);
+ if (ret < 0)
+ goto err_umem;
+ }
+
+ /*
+ * We always request write permissions to the pages, to force breaking
+ * of any CoW during the registration of the MR. For read-only MRs we
+ * use the "force" flag to indicate that CoW breaking is required but
+ * the registration should not fail if referencing read-only areas.
+ */
+ ret = ib_peer_client->peer_mem->get_pages(umem_p->umem.address,
+ umem_p->umem.length, 1,
+ !umem_p->umem.writable, NULL,
+ peer_client_context,
+ umem_p->xa_id);
+ if (ret)
+ goto err_xa;
+
+ ret = ib_peer_client->peer_mem->dma_map(&umem_p->umem.sg_head,
+ peer_client_context,
+ umem_p->umem.ibdev->dma_device,
+ 0, &umem_p->umem.nmap);
+ if (ret)
+ goto err_pages;
+
+ peer_page_size = ib_peer_client->peer_mem->get_page_size(peer_client_context);
+ if (peer_page_size != PAGE_SIZE)
+ fix_peer_sgls(umem_p, peer_page_size);
+
+ umem_p->mapped = true;
+ atomic64_add(umem_p->umem.nmap, &ib_peer_client->stats.num_reg_pages);
+ atomic64_add(umem_p->umem.length, &ib_peer_client->stats.num_reg_bytes);
+ atomic64_inc(&ib_peer_client->stats.num_alloc_mrs);
+
+ /*
+ * If invalidation is allowed then the caller must call
+ * ib_umem_activate_invalidation_notifier() or ib_peer_umem_release() to
+ * unlock this mutex. The call to should be done after the last
+ * read to sg_head, once the caller is ready for the invalidation
+ * function to be called.
+ */
+ if (umem_p->xa_id == PEER_NO_INVALIDATION_ID)
+ mutex_unlock(&umem_p->mapping_lock);
+
+ /*
+ * On success the old umem is replaced with the new, larger, allocation
+ */
+ kfree(old_umem);
+ return &umem_p->umem;
+
+err_pages:
+ ib_peer_client->peer_mem->put_pages(&umem_p->umem.sg_head,
+ umem_p->peer_client_context);
+err_xa:
+ if (umem_p->xa_id != PEER_NO_INVALIDATION_ID)
+ xa_erase(&umem_p->ib_peer_client->umem_xa, umem_p->xa_id);
+err_umem:
+ mutex_unlock(&umem_p->mapping_lock);
+ kref_put(&umem_p->kref, ib_peer_umem_kref_release);
+err_client:
+ ib_put_peer_client(ib_peer_client, peer_client_context);
+ return ERR_PTR(ret);
+}
+
+void ib_peer_umem_release(struct ib_umem *umem)
+{
+ struct ib_umem_peer *umem_p =
+ container_of(umem, struct ib_umem_peer, umem);
+
+ /* invalidation_func being set indicates activate was called */
+ if (umem_p->xa_id == PEER_NO_INVALIDATION_ID ||
+ umem_p->invalidation_func)
+ mutex_lock(&umem_p->mapping_lock);
+
+ if (umem_p->mapped)
+ ib_unmap_peer_client(umem_p);
+ mutex_unlock(&umem_p->mapping_lock);
+
+ if (umem_p->xa_id != PEER_NO_INVALIDATION_ID)
+ xa_erase(&umem_p->ib_peer_client->umem_xa, umem_p->xa_id);
+ ib_put_peer_client(umem_p->ib_peer_client, umem_p->peer_client_context);
+ umem_p->ib_peer_client = NULL;
+
+ /* Must match ib_umem_release() */
+ atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
+ mmdrop(umem->owning_mm);
+
+ kref_put(&umem_p->kref, ib_peer_umem_kref_release);
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2014-2020, Mellanox Technologies. All rights reserved.
+ */
+#ifndef RDMA_PEER_MEM_H
+#define RDMA_PEER_MEM_H
+
+#include <linux/scatterlist.h>
+
+#define IB_PEER_MEMORY_NAME_MAX 64
+#define IB_PEER_MEMORY_VER_MAX 16
+
+/*
+ * Prior versions used a void * for core_context, at some point this was
+ * switched to use u64. Be careful if compiling this as 32 bit. To help the
+ * value of core_context is limited to u32 so it should work OK despite the
+ * type change.
+ */
+#define PEER_MEM_U64_CORE_CONTEXT
+
+struct device;
+
+/**
+ * struct peer_memory_client - registration information for user virtual
+ * memory handlers
+ *
+ * The peer_memory_client scheme allows a driver to register with the ib_umem
+ * system that it has the ability to understand user virtual address ranges
+ * that are not compatible with get_user_pages(). For instance VMAs created
+ * with io_remap_pfn_range(), or other driver special VMA.
+ *
+ * For ranges the interface understands it can provide a DMA mapped sg_table
+ * for use by the ib_umem, allowing user virtual ranges that cannot be
+ * supported by get_user_pages() to be used as umems.
+ */
+struct peer_memory_client {
+ char name[IB_PEER_MEMORY_NAME_MAX];
+ char version[IB_PEER_MEMORY_VER_MAX];
+
+ /**
+ * acquire - Begin working with a user space virtual address range
+ *
+ * @addr - Virtual address to be checked whether belongs to peer.
+ * @size - Length of the virtual memory area starting at addr.
+ * @peer_mem_private_data - Obsolete, always NULL
+ * @peer_mem_name - Obsolete, always NULL
+ * @client_context - Returns an opaque value for this acquire use in
+ * other APIs
+ *
+ * Returns 1 if the peer_memory_client supports the entire virtual
+ * address range, 0 or -ERRNO otherwise. If 1 is returned then
+ * release() will be called to release the acquire().
+ */
+ int (*acquire)(unsigned long addr, size_t size,
+ void *peer_mem_private_data, char *peer_mem_name,
+ void **client_context);
+ /**
+ * get_pages - Fill in the first part of a sg_table for a virtual
+ * address range
+ *
+ * @addr - Virtual address to be checked whether belongs to peer.
+ * @size - Length of the virtual memory area starting at addr.
+ * @write - Always 1
+ * @force - 1 if write is required
+ * @sg_head - Obsolete, always NULL
+ * @client_context - Value returned by acquire()
+ * @core_context - Value to be passed to invalidate_peer_memory for
+ * this get
+ *
+ * addr/size are passed as the raw virtual address range requested by
+ * the user, it is not aligned to any page size. get_pages() is always
+ * followed by dma_map().
+ *
+ * Upon return the caller can call the invalidate_callback().
+ *
+ * Returns 0 on success, -ERRNO on failure. After success put_pages()
+ * will be called to return the pages.
+ */
+ int (*get_pages)(unsigned long addr, size_t size, int write, int force,
+ struct sg_table *sg_head, void *client_context,
+ u64 core_context);
+ /**
+ * dma_map - Create a DMA mapped sg_table
+ *
+ * @sg_head - The sg_table to allocate
+ * @client_context - Value returned by acquire()
+ * @dma_device - The device that will be doing DMA from these addresses
+ * @dmasync - Obsolete, always 0
+ * @nmap - Returns the number of dma mapped entries in the sg_head
+ *
+ * Must be called after get_pages(). This must fill in the sg_head with
+ * DMA mapped SGLs for dma_device. Each SGL start and end must meet a
+ * minimum alignment of at least PAGE_SIZE, though individual sgls can
+ * be multiples of PAGE_SIZE, in any mixture. Since the user virtual
+ * address/size are not page aligned, the implementation must increase
+ * it to the logical alignment when building the SGLs.
+ *
+ * Returns 0 on success, -ERRNO on failure. After success dma_unmap()
+ * will be called to unmap the pages. On failure sg_head must be left
+ * untouched or point to a valid sg_table.
+ */
+ int (*dma_map)(struct sg_table *sg_head, void *client_context,
+ struct device *dma_device, int dmasync, int *nmap);
+ /**
+ * dma_unmap - Unmap a DMA mapped sg_table
+ *
+ * @sg_head - The sg_table to unmap
+ * @client_context - Value returned by acquire()
+ * @dma_device - The device that will be doing DMA from these addresses
+ *
+ * sg_head will not be touched after this function returns.
+ *
+ * Must return 0.
+ */
+ int (*dma_unmap)(struct sg_table *sg_head, void *client_context,
+ struct device *dma_device);
+ /**
+ * put_pages - Unpin a SGL
+ *
+ * @sg_head - The sg_table to unpin
+ * @client_context - Value returned by acquire()
+ *
+ * sg_head must be freed on return.
+ */
+ void (*put_pages)(struct sg_table *sg_head, void *client_context);
+ /* Client should always return PAGE_SIZE */
+ unsigned long (*get_page_size)(void *client_context);
+ /**
+ * release - Undo acquire
+ *
+ * @client_context - Value returned by acquire()
+ *
+ * If acquire() returns 1 then release() must be called. All
+ * get_pages() and dma_map()'s must be undone before calling this
+ * function.
+ */
+ void (*release)(void *client_context);
+};
+
+/*
+ * If invalidate_callback() is non-NULL then the client will only support
+ * umems which can be invalidated. The caller may call the
+ * invalidate_callback() after acquire() on return the range will no longer
+ * have DMA active, and release() will have been called.
+ *
+ * Note: The implementation locking must ensure that get_pages(), and
+ * dma_map() do not have locking dependencies with invalidate_callback(). The
+ * ib_core will wait until any concurrent get_pages() or dma_map() completes
+ * before returning.
+ *
+ * Similarly, this can call dma_unmap(), put_pages() and release() from within
+ * the callback, or will wait for another thread doing those operations to
+ * complete.
+ *
+ * For these reasons the user of invalidate_callback() must be careful with
+ * locking.
+ */
+typedef int (*invalidate_peer_memory)(void *reg_handle, u64 core_context);
+
+void *
+ib_register_peer_memory_client(const struct peer_memory_client *peer_client,
+ invalidate_peer_memory *invalidate_callback);
+void ib_unregister_peer_memory_client(void *reg_handle);
+
+#endif