__dma_page_cpu_to_dev(page, offset, size, dir);
}
- struct dma_map_ops arm_dma_ops = {
+ const struct dma_map_ops arm_dma_ops = {
.alloc = arm_dma_alloc,
.free = arm_dma_free,
.mmap = arm_dma_mmap,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs);
- struct dma_map_ops arm_coherent_dma_ops = {
+ const struct dma_map_ops arm_coherent_dma_ops = {
.alloc = arm_coherent_dma_alloc,
.free = arm_coherent_dma_free,
.mmap = arm_coherent_dma_mmap,
static void *__alloc_from_contiguous(struct device *dev, size_t size,
pgprot_t prot, struct page **ret_page,
const void *caller, bool want_vaddr,
- int coherent_flag);
+ int coherent_flag, gfp_t gfp);
static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
pgprot_t prot, struct page **ret_page,
*/
if (dev_get_cma_area(NULL))
ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot,
- &page, atomic_pool_init, true, NORMAL);
+ &page, atomic_pool_init, true, NORMAL,
+ GFP_KERNEL);
else
ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot,
&page, atomic_pool_init, true);
static void *__alloc_from_contiguous(struct device *dev, size_t size,
pgprot_t prot, struct page **ret_page,
const void *caller, bool want_vaddr,
- int coherent_flag)
+ int coherent_flag, gfp_t gfp)
{
unsigned long order = get_order(size);
size_t count = size >> PAGE_SHIFT;
struct page *page;
void *ptr = NULL;
- page = dma_alloc_from_contiguous(dev, count, order);
+ page = dma_alloc_from_contiguous(dev, count, order, gfp);
if (!page)
return NULL;
#define __get_dma_pgprot(attrs, prot) __pgprot(0)
#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL
#define __alloc_from_pool(size, ret_page) NULL
-#define __alloc_from_contiguous(dev, size, prot, ret, c, wv, coherent_flag) NULL
+#define __alloc_from_contiguous(dev, size, prot, ret, c, wv, coherent_flag, gfp) NULL
#define __free_from_pool(cpu_addr, size) do { } while (0)
#define __free_from_contiguous(dev, page, cpu_addr, size, wv) do { } while (0)
#define __dma_free_remap(cpu_addr, size) do { } while (0)
{
return __alloc_from_contiguous(args->dev, args->size, args->prot,
ret_page, args->caller,
- args->want_vaddr, args->coherent_flag);
+ args->want_vaddr, args->coherent_flag,
+ args->gfp);
}
static void cma_allocator_free(struct arm_dma_free_args *args)
int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
enum dma_data_direction dir, unsigned long attrs)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
struct scatterlist *s;
int i, j;
void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
enum dma_data_direction dir, unsigned long attrs)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
struct scatterlist *s;
int i;
void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
struct scatterlist *s;
int i;
void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
struct scatterlist *s;
int i;
#ifdef CONFIG_ARM_DMA_USE_IOMMU
+static int __dma_info_to_prot(enum dma_data_direction dir, unsigned long attrs)
+{
+ int prot = 0;
+
+ if (attrs & DMA_ATTR_PRIVILEGED)
+ prot |= IOMMU_PRIV;
+
+ switch (dir) {
+ case DMA_BIDIRECTIONAL:
+ return prot | IOMMU_READ | IOMMU_WRITE;
+ case DMA_TO_DEVICE:
+ return prot | IOMMU_READ;
+ case DMA_FROM_DEVICE:
+ return prot | IOMMU_WRITE;
+ default:
+ return prot;
+ }
+}
+
/* IOMMU */
static int extend_iommu_mapping(struct dma_iommu_mapping *mapping);
unsigned long order = get_order(size);
struct page *page;
- page = dma_alloc_from_contiguous(dev, count, order);
+ page = dma_alloc_from_contiguous(dev, count, order, gfp);
if (!page)
goto error;
* Create a mapping in device IO address space for specified pages
*/
static dma_addr_t
-__iommu_create_mapping(struct device *dev, struct page **pages, size_t size)
+__iommu_create_mapping(struct device *dev, struct page **pages, size_t size,
+ unsigned long attrs)
{
struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
len = (j - i) << PAGE_SHIFT;
ret = iommu_map(mapping->domain, iova, phys, len,
- IOMMU_READ|IOMMU_WRITE);
+ __dma_info_to_prot(DMA_BIDIRECTIONAL, attrs));
if (ret < 0)
goto fail;
iova += len;
}
static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp,
- dma_addr_t *handle, int coherent_flag)
+ dma_addr_t *handle, int coherent_flag,
+ unsigned long attrs)
{
struct page *page;
void *addr;
if (!addr)
return NULL;
- *handle = __iommu_create_mapping(dev, &page, size);
+ *handle = __iommu_create_mapping(dev, &page, size, attrs);
if (*handle == DMA_ERROR_CODE)
goto err_mapping;
if (coherent_flag == COHERENT || !gfpflags_allow_blocking(gfp))
return __iommu_alloc_simple(dev, size, gfp, handle,
- coherent_flag);
+ coherent_flag, attrs);
/*
* Following is a work-around (a.k.a. hack) to prevent pages
if (!pages)
return NULL;
- *handle = __iommu_create_mapping(dev, pages, size);
+ *handle = __iommu_create_mapping(dev, pages, size, attrs);
if (*handle == DMA_ERROR_CODE)
goto err_buffer;
GFP_KERNEL);
}
-static int __dma_direction_to_prot(enum dma_data_direction dir)
-{
- int prot;
-
- switch (dir) {
- case DMA_BIDIRECTIONAL:
- prot = IOMMU_READ | IOMMU_WRITE;
- break;
- case DMA_TO_DEVICE:
- prot = IOMMU_READ;
- break;
- case DMA_FROM_DEVICE:
- prot = IOMMU_WRITE;
- break;
- default:
- prot = 0;
- }
-
- return prot;
-}
-
/*
* Map a part of the scatter-gather list into contiguous io address space
*/
if (!is_coherent && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
__dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
- prot = __dma_direction_to_prot(dir);
+ prot = __dma_info_to_prot(dir, attrs);
ret = iommu_map(mapping->domain, iova, phys, len, prot);
if (ret < 0)
if (dma_addr == DMA_ERROR_CODE)
return dma_addr;
- prot = __dma_direction_to_prot(dir);
+ prot = __dma_info_to_prot(dir, attrs);
ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot);
if (ret < 0)
if (dma_addr == DMA_ERROR_CODE)
return dma_addr;
- prot = __dma_direction_to_prot(dir) | IOMMU_MMIO;
+ prot = __dma_info_to_prot(dir, attrs) | IOMMU_MMIO;
ret = iommu_map(mapping->domain, dma_addr, addr, len, prot);
if (ret < 0)
__dma_page_cpu_to_dev(page, offset, size, dir);
}
- struct dma_map_ops iommu_ops = {
+ const struct dma_map_ops iommu_ops = {
.alloc = arm_iommu_alloc_attrs,
.free = arm_iommu_free_attrs,
.mmap = arm_iommu_mmap_attrs,
.unmap_resource = arm_iommu_unmap_resource,
};
- struct dma_map_ops iommu_coherent_ops = {
+ const struct dma_map_ops iommu_coherent_ops = {
.alloc = arm_coherent_iommu_alloc_attrs,
.free = arm_coherent_iommu_free_attrs,
.mmap = arm_coherent_iommu_mmap_attrs,
}
EXPORT_SYMBOL_GPL(arm_iommu_detach_device);
- static struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent)
+ static const struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent)
{
return coherent ? &iommu_coherent_ops : &iommu_ops;
}
#endif /* CONFIG_ARM_DMA_USE_IOMMU */
- static struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
+ static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
{
return coherent ? &arm_coherent_dma_ops : &arm_dma_ops;
}
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent)
{
- struct dma_map_ops *dma_ops;
+ const struct dma_map_ops *dma_ops;
dev->archdata.dma_coherent = coherent;
if (arm_setup_iommu_dma_ops(dev, dma_base, size, iommu))
void *addr;
page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
- get_order(size));
+ get_order(size), flags);
if (!page)
return NULL;
dma_addr_t dev_addr;
dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
- if (!is_device_dma_coherent(dev))
+ if (!is_device_dma_coherent(dev) &&
+ (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
__dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
return dev_addr;
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
- if (!is_device_dma_coherent(dev))
+ if (!is_device_dma_coherent(dev) &&
+ (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
__dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
}
int i, ret;
ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
- if (!is_device_dma_coherent(dev))
+ if (!is_device_dma_coherent(dev) &&
+ (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
for_each_sg(sgl, sg, ret, i)
__dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
sg->length, dir);
struct scatterlist *sg;
int i;
- if (!is_device_dma_coherent(dev))
+ if (!is_device_dma_coherent(dev) &&
+ (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
for_each_sg(sgl, sg, nelems, i)
__dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
sg->length, dir);
return 1;
}
- static struct dma_map_ops swiotlb_dma_ops = {
+static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr)
+{
+ if (swiotlb)
+ return swiotlb_dma_mapping_error(hwdev, addr);
+ return 0;
+}
+
+ static const struct dma_map_ops swiotlb_dma_ops = {
.alloc = __dma_alloc,
.free = __dma_free,
.mmap = __swiotlb_mmap,
.sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
.sync_sg_for_device = __swiotlb_sync_sg_for_device,
.dma_supported = __swiotlb_dma_supported,
- .mapping_error = swiotlb_dma_mapping_error,
+ .mapping_error = __swiotlb_dma_mapping_error,
};
static int __init atomic_pool_init(void)
if (dev_get_cma_area(NULL))
page = dma_alloc_from_contiguous(NULL, nr_pages,
- pool_size_order);
+ pool_size_order, GFP_KERNEL);
else
page = alloc_pages(GFP_DMA, pool_size_order);
return 0;
}
- struct dma_map_ops dummy_dma_ops = {
+ const struct dma_map_ops dummy_dma_ops = {
.alloc = __dummy_alloc,
.free = __dummy_free,
.mmap = __dummy_mmap,
unsigned long attrs)
{
bool coherent = is_device_dma_coherent(dev);
- int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent);
+ int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
size_t iosize = size;
void *addr;
unsigned long attrs)
{
bool coherent = is_device_dma_coherent(dev);
- int prot = dma_direction_to_prot(dir, coherent);
+ int prot = dma_info_to_prot(dir, coherent, attrs);
dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
if (!iommu_dma_mapping_error(dev, dev_addr) &&
__iommu_sync_sg_for_device(dev, sgl, nelems, dir);
return iommu_dma_map_sg(dev, sgl, nelems,
- dma_direction_to_prot(dir, coherent));
+ dma_info_to_prot(dir, coherent, attrs));
}
static void __iommu_unmap_sg_attrs(struct device *dev,
iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs);
}
- static struct dma_map_ops iommu_dma_ops = {
+ static const struct dma_map_ops iommu_dma_ops = {
.alloc = __iommu_alloc_attrs,
.free = __iommu_free_attrs,
.mmap = __iommu_mmap_attrs,
.sync_sg_for_device = __iommu_sync_sg_for_device,
.map_resource = iommu_dma_map_resource,
.unmap_resource = iommu_dma_unmap_resource,
- .dma_supported = iommu_dma_supported,
.mapping_error = iommu_dma_mapping_error,
};
* then the IOMMU core will have already configured a group for this
* device, and allocated the default domain for that group.
*/
- if (!domain || iommu_dma_init_domain(domain, dma_base, size, dev)) {
- pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
- dev_name(dev));
- return false;
+ if (!domain)
+ goto out_err;
+
+ if (domain->type == IOMMU_DOMAIN_DMA) {
+ if (iommu_dma_init_domain(domain, dma_base, size, dev))
+ goto out_err;
+
- dev->archdata.dma_ops = &iommu_dma_ops;
++ dev->dma_ops = &iommu_dma_ops;
}
- dev->dma_ops = &iommu_dma_ops;
return true;
+out_err:
+ pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
+ dev_name(dev));
+ return false;
}
static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops,
void arch_teardown_dma_ops(struct device *dev)
{
- dev->archdata.dma_ops = NULL;
+ dev->dma_ops = NULL;
}
#else
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent)
{
- if (!dev->archdata.dma_ops)
- dev->archdata.dma_ops = &swiotlb_dma_ops;
+ if (!dev->dma_ops)
+ dev->dma_ops = &swiotlb_dma_ops;
dev->archdata.dma_coherent = coherent;
__iommu_setup_dma_ops(dev, dma_base, size, iommu);
cache_clear(handle, size);
break;
default:
- if (printk_ratelimit())
- printk("dma_sync_single_for_device: unsupported dir %u\n", dir);
+ pr_err_ratelimited("dma_sync_single_for_device: unsupported dir %u\n",
+ dir);
break;
}
}
return nents;
}
- struct dma_map_ops m68k_dma_ops = {
+ const struct dma_map_ops m68k_dma_ops = {
.alloc = m68k_dma_alloc,
.free = m68k_dma_free,
.map_page = m68k_dma_map_page,
/* ignore region specifiers */
gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
-#ifdef CONFIG_ZONE_DMA
- if (dev == NULL)
+ if (IS_ENABLED(CONFIG_ZONE_DMA) && dev == NULL)
gfp |= __GFP_DMA;
- else if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
+ else if (IS_ENABLED(CONFIG_ZONE_DMA) &&
+ dev->coherent_dma_mask <= DMA_BIT_MASK(24))
gfp |= __GFP_DMA;
- else
-#endif
-#ifdef CONFIG_ZONE_DMA32
- if (dev->coherent_dma_mask <= DMA_BIT_MASK(32))
+ else if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
+ dev->coherent_dma_mask <= DMA_BIT_MASK(32))
gfp |= __GFP_DMA32;
- else
-#endif
- ;
/* Don't invoke OOM killer */
gfp |= __GFP_NORETRY;
}
struct octeon_dma_map_ops {
- struct dma_map_ops dma_map_ops;
+ const struct dma_map_ops dma_map_ops;
dma_addr_t (*phys_to_dma)(struct device *dev, phys_addr_t paddr);
phys_addr_t (*dma_to_phys)(struct device *dev, dma_addr_t daddr);
};
},
};
- struct dma_map_ops *octeon_pci_dma_map_ops;
+ const struct dma_map_ops *octeon_pci_dma_map_ops;
void __init octeon_pci_dma_init(void)
{
/* ignore region specifiers */
gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
-#ifdef CONFIG_ISA
- if (dev == NULL)
+ if ((IS_ENABLED(CONFIG_ISA) && dev == NULL) ||
+ (IS_ENABLED(CONFIG_ZONE_DMA) &&
+ dev->coherent_dma_mask < DMA_BIT_MASK(32)))
gfp |= __GFP_DMA;
- else
-#endif
-#ifdef CONFIG_ZONE_DMA
- if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
- gfp |= __GFP_DMA;
- else
-#endif
-#ifdef CONFIG_ZONE_DMA32
- if (dev->coherent_dma_mask < DMA_BIT_MASK(40))
+ else if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
+ dev->coherent_dma_mask < DMA_BIT_MASK(40))
gfp |= __GFP_DMA32;
- else
-#endif
- ;
+
gfp |= __GFP_NORETRY;
ret = swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
return daddr;
}
- static struct dma_map_ops loongson_dma_map_ops = {
+ static const struct dma_map_ops loongson_dma_map_ops = {
.alloc = loongson_dma_alloc_coherent,
.free = loongson_dma_free_coherent,
.map_page = loongson_dma_map_page,
gfp = massage_gfp_flags(dev, gfp);
if (IS_ENABLED(CONFIG_DMA_CMA) && gfpflags_allow_blocking(gfp))
- page = dma_alloc_from_contiguous(dev,
- count, get_order(size));
+ page = dma_alloc_from_contiguous(dev, count, get_order(size),
+ gfp);
if (!page)
page = alloc_pages(gfp, get_order(size));
EXPORT_SYMBOL(dma_cache_sync);
- static struct dma_map_ops mips_default_dma_map_ops = {
+ static const struct dma_map_ops mips_default_dma_map_ops = {
.alloc = mips_dma_alloc_coherent,
.free = mips_dma_free_coherent,
.mmap = mips_dma_mmap,
.dma_supported = mips_dma_supported
};
- struct dma_map_ops *mips_dma_map_ops = &mips_default_dma_map_ops;
+ const struct dma_map_ops *mips_dma_map_ops = &mips_default_dma_map_ops;
EXPORT_SYMBOL(mips_dma_map_ops);
#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
#include <linux/of_address.h>
#include <linux/of_pci.h>
#include <linux/mm.h>
+#include <linux/shmem_fs.h>
#include <linux/list.h>
#include <linux/syscalls.h>
#include <linux/irq.h>
EXPORT_SYMBOL(isa_mem_base);
- static struct dma_map_ops *pci_dma_ops = &dma_direct_ops;
+ static const struct dma_map_ops *pci_dma_ops = &dma_direct_ops;
- void set_pci_dma_ops(struct dma_map_ops *dma_ops)
+ void set_pci_dma_ops(const struct dma_map_ops *dma_ops)
{
pci_dma_ops = dma_ops;
}
- struct dma_map_ops *get_pci_dma_ops(void)
+ const struct dma_map_ops *get_pci_dma_ops(void)
{
return pci_dma_ops;
}
config ARCH_SUPPORTS_UPROBES
def_bool y
-config DEBUG_RODATA
- def_bool y
-
config S390
def_bool y
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV
+ select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SG_CHAIN
+ select ARCH_HAS_STRICT_KERNEL_RWX
+ select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_INLINE_READ_LOCK
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS
+ select DMA_NOOP_OPS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EFFICIENT_UNALIGNED_ACCESS
rc = zpci_load(&data, req, offset);
if (!rc) {
- data = data << ((8 - len) * 8);
- data = le64_to_cpu(data);
+ data = le64_to_cpu((__force __le64) data);
+ data >>= (8 - len) * 8;
*val = (u32) data;
} else
*val = 0xffffffff;
u64 data = val;
int rc;
- data = cpu_to_le64(data);
- data = data >> ((8 - len) * 8);
+ data <<= (8 - len) * 8;
+ data = (__force u64) cpu_to_le64(data);
rc = zpci_store(data, req, offset);
return rc;
}
int i;
pdev->dev.groups = zpci_attr_groups;
- pdev->dev.archdata.dma_ops = &s390_pci_dma_ops;
+ pdev->dev.dma_ops = &s390_pci_dma_ops;
zpci_map_resources(pdev);
for (i = 0; i < PCI_BAR_COUNT; i++) {
/* were we called with bad_dma_address? */
badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE);
- if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) {
+ if (unlikely(dma_addr < badend)) {
WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA "
"address 0x%Lx\n", dma_addr);
return;
free_pages((unsigned long)vaddr, get_order(size));
}
- static struct dma_map_ops calgary_dma_ops = {
+ static const struct dma_map_ops calgary_dma_ops = {
.alloc = calgary_alloc_coherent,
.free = calgary_free_coherent,
.map_sg = calgary_map_sg,
tbl = find_iommu_table(&dev->dev);
if (translation_enabled(tbl))
- dev->dev.archdata.dma_ops = &calgary_dma_ops;
+ dev->dev.dma_ops = &calgary_dma_ops;
}
return ret;
calgary_disable_translation(dev);
calgary_free_bus(dev);
pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */
- dev->dev.archdata.dma_ops = NULL;
+ dev->dev.dma_ops = NULL;
} while (1);
return ret;
static int forbid_dac __read_mostly;
- struct dma_map_ops *dma_ops = &nommu_dma_ops;
+ const struct dma_map_ops *dma_ops = &nommu_dma_ops;
EXPORT_SYMBOL(dma_ops);
static int iommu_sac_force __read_mostly;
page = NULL;
/* CMA can be used only in the context which permits sleeping */
if (gfpflags_allow_blocking(flag)) {
- page = dma_alloc_from_contiguous(dev, count, get_order(size));
+ page = dma_alloc_from_contiguous(dev, count, get_order(size),
+ flag);
if (page && page_to_phys(page) + size > dma_mask) {
dma_release_from_contiguous(dev, page, count);
page = NULL;
int dma_supported(struct device *dev, u64 mask)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
#ifdef CONFIG_PCI
if (mask > 0xffffffff && forbid_dac > 0) {
flag |= GFP_DMA;
if (gfpflags_allow_blocking(flag))
- page = dma_alloc_from_contiguous(dev, count, get_order(size));
+ page = dma_alloc_from_contiguous(dev, count, get_order(size),
+ flag);
if (!page)
page = alloc_pages(flag, get_order(size));
return 0;
}
- struct dma_map_ops xtensa_dma_map_ops = {
+ const struct dma_map_ops xtensa_dma_map_ops = {
.alloc = xtensa_dma_alloc,
.free = xtensa_dma_free,
.map_page = xtensa_map_page,
int ret;
struct ib_client *client;
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
+ struct device *parent = device->dev.parent;
+
+ WARN_ON_ONCE(!parent);
+ if (!device->dev.dma_ops)
+ device->dev.dma_ops = parent->dma_ops;
+ if (!device->dev.dma_mask)
+ device->dev.dma_mask = parent->dma_mask;
+ if (!device->dev.coherent_dma_mask)
+ device->dev.coherent_dma_mask = parent->coherent_dma_mask;
mutex_lock(&device_mutex);
union ib_gid gid;
int err;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
memset(port_attr, 0, sizeof(*port_attr));
if (!device->modify_port)
return -ENOSYS;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
return device->modify_port(device, port_num, port_modify_mask,
--- /dev/null
- ibdev->dma_device = &rdev->en_dev->pdev->dev;
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Main component of the bnxt_re driver
+ */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <net/dcbnl.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <linux/if_ether.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_addr.h>
+
+#include "bnxt_ulp.h"
+#include "roce_hsi.h"
+#include "qplib_res.h"
+#include "qplib_sp.h"
+#include "qplib_fp.h"
+#include "qplib_rcfw.h"
+#include "bnxt_re.h"
+#include "ib_verbs.h"
+#include <rdma/bnxt_re-abi.h>
+#include "bnxt.h"
+static char version[] =
+ BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n";
+
+MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
+MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(ROCE_DRV_MODULE_VERSION);
+
+/* globals */
+static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list);
+/* Mutex to protect the list of bnxt_re devices added */
+static DEFINE_MUTEX(bnxt_re_dev_lock);
+static struct workqueue_struct *bnxt_re_wq;
+
+/* for handling bnxt_en callbacks later */
+static void bnxt_re_stop(void *p)
+{
+}
+
+static void bnxt_re_start(void *p)
+{
+}
+
+static void bnxt_re_sriov_config(void *p, int num_vfs)
+{
+}
+
+static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
+ .ulp_async_notifier = NULL,
+ .ulp_stop = bnxt_re_stop,
+ .ulp_start = bnxt_re_start,
+ .ulp_sriov_config = bnxt_re_sriov_config
+};
+
+/* RoCE -> Net driver */
+
+/* Driver registration routines used to let the networking driver (bnxt_en)
+ * to know that the RoCE driver is now installed
+ */
+static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev, bool lock_wait)
+{
+ struct bnxt_en_dev *en_dev;
+ int rc;
+
+ if (!rdev)
+ return -EINVAL;
+
+ en_dev = rdev->en_dev;
+ /* Acquire rtnl lock if it is not invokded from netdev event */
+ if (lock_wait)
+ rtnl_lock();
+
+ rc = en_dev->en_ops->bnxt_unregister_device(rdev->en_dev,
+ BNXT_ROCE_ULP);
+ if (lock_wait)
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev;
+ int rc = 0;
+
+ if (!rdev)
+ return -EINVAL;
+
+ en_dev = rdev->en_dev;
+
+ rtnl_lock();
+ rc = en_dev->en_ops->bnxt_register_device(en_dev, BNXT_ROCE_ULP,
+ &bnxt_re_ulp_ops, rdev);
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_free_msix(struct bnxt_re_dev *rdev, bool lock_wait)
+{
+ struct bnxt_en_dev *en_dev;
+ int rc;
+
+ if (!rdev)
+ return -EINVAL;
+
+ en_dev = rdev->en_dev;
+
+ if (lock_wait)
+ rtnl_lock();
+
+ rc = en_dev->en_ops->bnxt_free_msix(rdev->en_dev, BNXT_ROCE_ULP);
+
+ if (lock_wait)
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
+{
+ int rc = 0, num_msix_want = BNXT_RE_MIN_MSIX, num_msix_got;
+ struct bnxt_en_dev *en_dev;
+
+ if (!rdev)
+ return -EINVAL;
+
+ en_dev = rdev->en_dev;
+
+ rtnl_lock();
+ num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP,
+ rdev->msix_entries,
+ num_msix_want);
+ if (num_msix_got < BNXT_RE_MIN_MSIX) {
+ rc = -EINVAL;
+ goto done;
+ }
+ if (num_msix_got != num_msix_want) {
+ dev_warn(rdev_to_dev(rdev),
+ "Requested %d MSI-X vectors, got %d\n",
+ num_msix_want, num_msix_got);
+ }
+ rdev->num_msix = num_msix_got;
+done:
+ rtnl_unlock();
+ return rc;
+}
+
+static void bnxt_re_init_hwrm_hdr(struct bnxt_re_dev *rdev, struct input *hdr,
+ u16 opcd, u16 crid, u16 trid)
+{
+ hdr->req_type = cpu_to_le16(opcd);
+ hdr->cmpl_ring = cpu_to_le16(crid);
+ hdr->target_id = cpu_to_le16(trid);
+}
+
+static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
+ int msg_len, void *resp, int resp_max_len,
+ int timeout)
+{
+ fw_msg->msg = msg;
+ fw_msg->msg_len = msg_len;
+ fw_msg->resp = resp;
+ fw_msg->resp_max_len = resp_max_len;
+ fw_msg->timeout = timeout;
+}
+
+static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id,
+ bool lock_wait)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_ring_free_input req = {0};
+ struct hwrm_ring_free_output resp;
+ struct bnxt_fw_msg fw_msg;
+ bool do_unlock = false;
+ int rc = -EINVAL;
+
+ if (!en_dev)
+ return rc;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ if (lock_wait) {
+ rtnl_lock();
+ do_unlock = true;
+ }
+
+ bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1);
+ req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
+ req.ring_id = cpu_to_le16(fw_ring_id);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ if (rc)
+ dev_err(rdev_to_dev(rdev),
+ "Failed to free HW ring:%d :%#x", req.ring_id, rc);
+ if (do_unlock)
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr,
+ int pages, int type, u32 ring_mask,
+ u32 map_index, u16 *fw_ring_id)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_ring_alloc_input req = {0};
+ struct hwrm_ring_alloc_output resp;
+ struct bnxt_fw_msg fw_msg;
+ int rc = -EINVAL;
+
+ if (!en_dev)
+ return rc;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ rtnl_lock();
+ bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1);
+ req.enables = 0;
+ req.page_tbl_addr = cpu_to_le64(dma_arr[0]);
+ if (pages > 1) {
+ /* Page size is in log2 units */
+ req.page_size = BNXT_PAGE_SHIFT;
+ req.page_tbl_depth = 1;
+ }
+ req.fbo = 0;
+ /* Association of ring index with doorbell index and MSIX number */
+ req.logical_id = cpu_to_le16(map_index);
+ req.length = cpu_to_le32(ring_mask + 1);
+ req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
+ req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ if (!rc)
+ *fw_ring_id = le16_to_cpu(resp.ring_id);
+
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
+ u32 fw_stats_ctx_id, bool lock_wait)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_stat_ctx_free_input req = {0};
+ struct bnxt_fw_msg fw_msg;
+ bool do_unlock = false;
+ int rc = -EINVAL;
+
+ if (!en_dev)
+ return rc;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ if (lock_wait) {
+ rtnl_lock();
+ do_unlock = true;
+ }
+
+ bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1);
+ req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&req,
+ sizeof(req), DFLT_HWRM_CMD_TIMEOUT);
+ rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ if (rc)
+ dev_err(rdev_to_dev(rdev),
+ "Failed to free HW stats context %#x", rc);
+
+ if (do_unlock)
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
+ dma_addr_t dma_map,
+ u32 *fw_stats_ctx_id)
+{
+ struct hwrm_stat_ctx_alloc_output resp = {0};
+ struct hwrm_stat_ctx_alloc_input req = {0};
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct bnxt_fw_msg fw_msg;
+ int rc = -EINVAL;
+
+ *fw_stats_ctx_id = INVALID_STATS_CTX_ID;
+
+ if (!en_dev)
+ return rc;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ rtnl_lock();
+
+ bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
+ req.update_period_ms = cpu_to_le32(1000);
+ req.stats_dma_addr = cpu_to_le64(dma_map);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ if (!rc)
+ *fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id);
+
+ rtnl_unlock();
+ return rc;
+}
+
+/* Device */
+
+static bool is_bnxt_re_dev(struct net_device *netdev)
+{
+ struct ethtool_drvinfo drvinfo;
+
+ if (netdev->ethtool_ops && netdev->ethtool_ops->get_drvinfo) {
+ memset(&drvinfo, 0, sizeof(drvinfo));
+ netdev->ethtool_ops->get_drvinfo(netdev, &drvinfo);
+
+ if (strcmp(drvinfo.driver, "bnxt_en"))
+ return false;
+ return true;
+ }
+ return false;
+}
+
+static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
+{
+ struct bnxt_re_dev *rdev;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(rdev, &bnxt_re_dev_list, list) {
+ if (rdev->netdev == netdev) {
+ rcu_read_unlock();
+ return rdev;
+ }
+ }
+ rcu_read_unlock();
+ return NULL;
+}
+
+static void bnxt_re_dev_unprobe(struct net_device *netdev,
+ struct bnxt_en_dev *en_dev)
+{
+ dev_put(netdev);
+ module_put(en_dev->pdev->driver->driver.owner);
+}
+
+static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
+{
+ struct bnxt *bp = netdev_priv(netdev);
+ struct bnxt_en_dev *en_dev;
+ struct pci_dev *pdev;
+
+ /* Call bnxt_en's RoCE probe via indirect API */
+ if (!bp->ulp_probe)
+ return ERR_PTR(-EINVAL);
+
+ en_dev = bp->ulp_probe(netdev);
+ if (IS_ERR(en_dev))
+ return en_dev;
+
+ pdev = en_dev->pdev;
+ if (!pdev)
+ return ERR_PTR(-EINVAL);
+
+ if (!(en_dev->flags & BNXT_EN_FLAG_ROCE_CAP)) {
+ dev_dbg(&pdev->dev,
+ "%s: probe error: RoCE is not supported on this device",
+ ROCE_DRV_MODULE_NAME);
+ return ERR_PTR(-ENODEV);
+ }
+
+ /* Bump net device reference count */
+ if (!try_module_get(pdev->driver->driver.owner))
+ return ERR_PTR(-ENODEV);
+
+ dev_hold(netdev);
+
+ return en_dev;
+}
+
+static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev)
+{
+ ib_unregister_device(&rdev->ibdev);
+}
+
+static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
+{
+ struct ib_device *ibdev = &rdev->ibdev;
+
+ /* ib device init */
+ ibdev->owner = THIS_MODULE;
+ ibdev->node_type = RDMA_NODE_IB_CA;
+ strlcpy(ibdev->name, "bnxt_re%d", IB_DEVICE_NAME_MAX);
+ strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
+ strlen(BNXT_RE_DESC) + 5);
+ ibdev->phys_port_cnt = 1;
+
+ bnxt_qplib_get_guid(rdev->netdev->dev_addr, (u8 *)&ibdev->node_guid);
+
+ ibdev->num_comp_vectors = 1;
++ ibdev->dev.parent = &rdev->en_dev->pdev->dev;
+ ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
+
+ /* User space */
+ ibdev->uverbs_abi_ver = BNXT_RE_ABI_VERSION;
+ ibdev->uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_REREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_AH) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_AH);
+ /* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */
+
+ /* Kernel verbs */
+ ibdev->query_device = bnxt_re_query_device;
+ ibdev->modify_device = bnxt_re_modify_device;
+
+ ibdev->query_port = bnxt_re_query_port;
+ ibdev->modify_port = bnxt_re_modify_port;
+ ibdev->get_port_immutable = bnxt_re_get_port_immutable;
+ ibdev->query_pkey = bnxt_re_query_pkey;
+ ibdev->query_gid = bnxt_re_query_gid;
+ ibdev->get_netdev = bnxt_re_get_netdev;
+ ibdev->add_gid = bnxt_re_add_gid;
+ ibdev->del_gid = bnxt_re_del_gid;
+ ibdev->get_link_layer = bnxt_re_get_link_layer;
+
+ ibdev->alloc_pd = bnxt_re_alloc_pd;
+ ibdev->dealloc_pd = bnxt_re_dealloc_pd;
+
+ ibdev->create_ah = bnxt_re_create_ah;
+ ibdev->modify_ah = bnxt_re_modify_ah;
+ ibdev->query_ah = bnxt_re_query_ah;
+ ibdev->destroy_ah = bnxt_re_destroy_ah;
+
+ ibdev->create_qp = bnxt_re_create_qp;
+ ibdev->modify_qp = bnxt_re_modify_qp;
+ ibdev->query_qp = bnxt_re_query_qp;
+ ibdev->destroy_qp = bnxt_re_destroy_qp;
+
+ ibdev->post_send = bnxt_re_post_send;
+ ibdev->post_recv = bnxt_re_post_recv;
+
+ ibdev->create_cq = bnxt_re_create_cq;
+ ibdev->destroy_cq = bnxt_re_destroy_cq;
+ ibdev->poll_cq = bnxt_re_poll_cq;
+ ibdev->req_notify_cq = bnxt_re_req_notify_cq;
+
+ ibdev->get_dma_mr = bnxt_re_get_dma_mr;
+ ibdev->dereg_mr = bnxt_re_dereg_mr;
+ ibdev->alloc_mr = bnxt_re_alloc_mr;
+ ibdev->map_mr_sg = bnxt_re_map_mr_sg;
+ ibdev->alloc_fmr = bnxt_re_alloc_fmr;
+ ibdev->map_phys_fmr = bnxt_re_map_phys_fmr;
+ ibdev->unmap_fmr = bnxt_re_unmap_fmr;
+ ibdev->dealloc_fmr = bnxt_re_dealloc_fmr;
+
+ ibdev->reg_user_mr = bnxt_re_reg_user_mr;
+ ibdev->alloc_ucontext = bnxt_re_alloc_ucontext;
+ ibdev->dealloc_ucontext = bnxt_re_dealloc_ucontext;
+ ibdev->mmap = bnxt_re_mmap;
+
+ return ib_register_device(ibdev, NULL);
+}
+
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
+}
+
+static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->dev_attr.fw_ver);
+}
+
+static ssize_t show_hca(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
+}
+
+static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL);
+static DEVICE_ATTR(fw_rev, 0444, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, 0444, show_hca, NULL);
+
+static struct device_attribute *bnxt_re_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_fw_rev,
+ &dev_attr_hca_type
+};
+
+static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
+{
+ dev_put(rdev->netdev);
+ rdev->netdev = NULL;
+
+ mutex_lock(&bnxt_re_dev_lock);
+ list_del_rcu(&rdev->list);
+ mutex_unlock(&bnxt_re_dev_lock);
+
+ synchronize_rcu();
+ flush_workqueue(bnxt_re_wq);
+
+ ib_dealloc_device(&rdev->ibdev);
+ /* rdev is gone */
+}
+
+static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
+ struct bnxt_en_dev *en_dev)
+{
+ struct bnxt_re_dev *rdev;
+
+ /* Allocate bnxt_re_dev instance here */
+ rdev = (struct bnxt_re_dev *)ib_alloc_device(sizeof(*rdev));
+ if (!rdev) {
+ dev_err(NULL, "%s: bnxt_re_dev allocation failure!",
+ ROCE_DRV_MODULE_NAME);
+ return NULL;
+ }
+ /* Default values */
+ rdev->netdev = netdev;
+ dev_hold(rdev->netdev);
+ rdev->en_dev = en_dev;
+ rdev->id = rdev->en_dev->pdev->devfn;
+ INIT_LIST_HEAD(&rdev->qp_list);
+ mutex_init(&rdev->qp_lock);
+ atomic_set(&rdev->qp_count, 0);
+ atomic_set(&rdev->cq_count, 0);
+ atomic_set(&rdev->srq_count, 0);
+ atomic_set(&rdev->mr_count, 0);
+ atomic_set(&rdev->mw_count, 0);
+ rdev->cosq[0] = 0xFFFF;
+ rdev->cosq[1] = 0xFFFF;
+
+ mutex_lock(&bnxt_re_dev_lock);
+ list_add_tail_rcu(&rdev->list, &bnxt_re_dev_list);
+ mutex_unlock(&bnxt_re_dev_lock);
+ return rdev;
+}
+
+static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
+ struct creq_func_event *aeqe)
+{
+ switch (aeqe->event) {
+ case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_cq *handle)
+{
+ struct bnxt_re_cq *cq = container_of(handle, struct bnxt_re_cq,
+ qplib_cq);
+
+ if (!cq) {
+ dev_err(NULL, "%s: CQ is NULL, CQN not handled",
+ ROCE_DRV_MODULE_NAME);
+ return -EINVAL;
+ }
+ if (cq->ib_cq.comp_handler) {
+ /* Lock comp_handler? */
+ (*cq->ib_cq.comp_handler)(&cq->ib_cq, cq->ib_cq.cq_context);
+ }
+
+ return 0;
+}
+
+static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
+{
+ if (rdev->nq.hwq.max_elements)
+ bnxt_qplib_disable_nq(&rdev->nq);
+
+ if (rdev->qplib_res.rcfw)
+ bnxt_qplib_cleanup_res(&rdev->qplib_res);
+}
+
+static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
+{
+ int rc = 0;
+
+ bnxt_qplib_init_res(&rdev->qplib_res);
+
+ if (rdev->msix_entries[BNXT_RE_NQ_IDX].vector <= 0)
+ return -EINVAL;
+
+ rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq,
+ rdev->msix_entries[BNXT_RE_NQ_IDX].vector,
+ rdev->msix_entries[BNXT_RE_NQ_IDX].db_offset,
+ &bnxt_re_cqn_handler,
+ NULL);
+
+ if (rc)
+ dev_err(rdev_to_dev(rdev), "Failed to enable NQ: %#x", rc);
+
+ return rc;
+}
+
+static void bnxt_re_free_res(struct bnxt_re_dev *rdev, bool lock_wait)
+{
+ if (rdev->nq.hwq.max_elements) {
+ bnxt_re_net_ring_free(rdev, rdev->nq.ring_id, lock_wait);
+ bnxt_qplib_free_nq(&rdev->nq);
+ }
+ if (rdev->qplib_res.dpi_tbl.max) {
+ bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
+ &rdev->qplib_res.dpi_tbl,
+ &rdev->dpi_privileged);
+ }
+ if (rdev->qplib_res.rcfw) {
+ bnxt_qplib_free_res(&rdev->qplib_res);
+ rdev->qplib_res.rcfw = NULL;
+ }
+}
+
+static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
+{
+ int rc = 0;
+
+ /* Configure and allocate resources for qplib */
+ rdev->qplib_res.rcfw = &rdev->rcfw;
+ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->en_dev->pdev,
+ rdev->netdev, &rdev->dev_attr);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl,
+ &rdev->dpi_privileged,
+ rdev);
+ if (rc)
+ goto fail;
+
+ rdev->nq.hwq.max_elements = BNXT_RE_MAX_CQ_COUNT +
+ BNXT_RE_MAX_SRQC_COUNT + 2;
+ rc = bnxt_qplib_alloc_nq(rdev->en_dev->pdev, &rdev->nq);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to allocate NQ memory: %#x", rc);
+ goto fail;
+ }
+ rc = bnxt_re_net_ring_alloc
+ (rdev, rdev->nq.hwq.pbl[PBL_LVL_0].pg_map_arr,
+ rdev->nq.hwq.pbl[rdev->nq.hwq.level].pg_count,
+ HWRM_RING_ALLOC_CMPL, BNXT_QPLIB_NQE_MAX_CNT - 1,
+ rdev->msix_entries[BNXT_RE_NQ_IDX].ring_idx,
+ &rdev->nq.ring_id);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to allocate NQ ring: %#x", rc);
+ goto free_nq;
+ }
+ return 0;
+free_nq:
+ bnxt_qplib_free_nq(&rdev->nq);
+fail:
+ rdev->qplib_res.rcfw = NULL;
+ return rc;
+}
+
+static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
+ u8 port_num, enum ib_event_type event)
+{
+ struct ib_event ib_event;
+
+ ib_event.device = ibdev;
+ if (qp)
+ ib_event.element.qp = qp;
+ else
+ ib_event.element.port_num = port_num;
+ ib_event.event = event;
+ ib_dispatch_event(&ib_event);
+}
+
+#define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN 0x02
+static int bnxt_re_query_hwrm_pri2cos(struct bnxt_re_dev *rdev, u8 dir,
+ u64 *cid_map)
+{
+ struct hwrm_queue_pri2cos_qcfg_input req = {0};
+ struct bnxt *bp = netdev_priv(rdev->netdev);
+ struct hwrm_queue_pri2cos_qcfg_output resp;
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct bnxt_fw_msg fw_msg;
+ u32 flags = 0;
+ u8 *qcfgmap, *tmp_map;
+ int rc = 0, i;
+
+ if (!cid_map)
+ return -EINVAL;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
+ HWRM_QUEUE_PRI2COS_QCFG, -1, -1);
+ flags |= (dir & 0x01);
+ flags |= HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN;
+ req.flags = cpu_to_le32(flags);
+ req.port_id = bp->pf.port_id;
+
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ if (rc)
+ return rc;
+
+ if (resp.queue_cfg_info) {
+ dev_warn(rdev_to_dev(rdev),
+ "Asymmetric cos queue configuration detected");
+ dev_warn(rdev_to_dev(rdev),
+ " on device, QoS may not be fully functional\n");
+ }
+ qcfgmap = &resp.pri0_cos_queue_id;
+ tmp_map = (u8 *)cid_map;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ tmp_map[i] = qcfgmap[i];
+
+ return rc;
+}
+
+static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev,
+ struct bnxt_re_qp *qp)
+{
+ return (qp->ib_qp.qp_type == IB_QPT_GSI) || (qp == rdev->qp1_sqp);
+}
+
+static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev)
+{
+ int mask = IB_QP_STATE;
+ struct ib_qp_attr qp_attr;
+ struct bnxt_re_qp *qp;
+
+ qp_attr.qp_state = IB_QPS_ERR;
+ mutex_lock(&rdev->qp_lock);
+ list_for_each_entry(qp, &rdev->qp_list, list) {
+ /* Modify the state of all QPs except QP1/Shadow QP */
+ if (!bnxt_re_is_qp1_or_shadow_qp(rdev, qp)) {
+ if (qp->qplib_qp.state !=
+ CMDQ_MODIFY_QP_NEW_STATE_RESET &&
+ qp->qplib_qp.state !=
+ CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ bnxt_re_dispatch_event(&rdev->ibdev, &qp->ib_qp,
+ 1, IB_EVENT_QP_FATAL);
+ bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, mask,
+ NULL);
+ }
+ }
+ }
+ mutex_unlock(&rdev->qp_lock);
+}
+
+static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev)
+{
+ u32 prio_map = 0, tmp_map = 0;
+ struct net_device *netdev;
+ struct dcb_app app;
+
+ netdev = rdev->netdev;
+
+ memset(&app, 0, sizeof(app));
+ app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
+ app.protocol = ETH_P_IBOE;
+ tmp_map = dcb_ieee_getapp_mask(netdev, &app);
+ prio_map = tmp_map;
+
+ app.selector = IEEE_8021QAZ_APP_SEL_DGRAM;
+ app.protocol = ROCE_V2_UDP_DPORT;
+ tmp_map = dcb_ieee_getapp_mask(netdev, &app);
+ prio_map |= tmp_map;
+
+ if (!prio_map)
+ prio_map = -EFAULT;
+ return prio_map;
+}
+
+static void bnxt_re_parse_cid_map(u8 prio_map, u8 *cid_map, u16 *cosq)
+{
+ u16 prio;
+ u8 id;
+
+ for (prio = 0, id = 0; prio < 8; prio++) {
+ if (prio_map & (1 << prio)) {
+ cosq[id] = cid_map[prio];
+ id++;
+ if (id == 2) /* Max 2 tcs supported */
+ break;
+ }
+ }
+}
+
+static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
+{
+ u8 prio_map = 0;
+ u64 cid_map;
+ int rc;
+
+ /* Get priority for roce */
+ rc = bnxt_re_get_priority_mask(rdev);
+ if (rc < 0)
+ return rc;
+ prio_map = (u8)rc;
+
+ if (prio_map == rdev->cur_prio_map)
+ return 0;
+ rdev->cur_prio_map = prio_map;
+ /* Get cosq id for this priority */
+ rc = bnxt_re_query_hwrm_pri2cos(rdev, 0, &cid_map);
+ if (rc) {
+ dev_warn(rdev_to_dev(rdev), "no cos for p_mask %x\n", prio_map);
+ return rc;
+ }
+ /* Parse CoS IDs for app priority */
+ bnxt_re_parse_cid_map(prio_map, (u8 *)&cid_map, rdev->cosq);
+
+ /* Config BONO. */
+ rc = bnxt_qplib_map_tc2cos(&rdev->qplib_res, rdev->cosq);
+ if (rc) {
+ dev_warn(rdev_to_dev(rdev), "no tc for cos{%x, %x}\n",
+ rdev->cosq[0], rdev->cosq[1]);
+ return rc;
+ }
+
+ return 0;
+}
+
+static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
+{
+ int i, rc;
+
+ if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
+ for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++)
+ device_remove_file(&rdev->ibdev.dev,
+ bnxt_re_attributes[i]);
+ /* Cleanup ib dev */
+ bnxt_re_unregister_ib(rdev);
+ }
+ if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
+ cancel_delayed_work(&rdev->worker);
+
+ bnxt_re_cleanup_res(rdev);
+ bnxt_re_free_res(rdev, lock_wait);
+
+ if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
+ rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
+ if (rc)
+ dev_warn(rdev_to_dev(rdev),
+ "Failed to deinitialize RCFW: %#x", rc);
+ bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id,
+ lock_wait);
+ bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
+ bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
+ bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, lock_wait);
+ bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
+ }
+ if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) {
+ rc = bnxt_re_free_msix(rdev, lock_wait);
+ if (rc)
+ dev_warn(rdev_to_dev(rdev),
+ "Failed to free MSI-X vectors: %#x", rc);
+ }
+ if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) {
+ rc = bnxt_re_unregister_netdev(rdev, lock_wait);
+ if (rc)
+ dev_warn(rdev_to_dev(rdev),
+ "Failed to unregister with netdev: %#x", rc);
+ }
+}
+
+static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
+{
+ u32 i;
+
+ rdev->qplib_ctx.qpc_count = BNXT_RE_MAX_QPC_COUNT;
+ rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT;
+ rdev->qplib_ctx.srqc_count = BNXT_RE_MAX_SRQC_COUNT;
+ rdev->qplib_ctx.cq_count = BNXT_RE_MAX_CQ_COUNT;
+ for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
+ rdev->qplib_ctx.tqm_count[i] =
+ rdev->dev_attr.tqm_alloc_reqs[i];
+}
+
+/* worker thread for polling periodic events. Now used for QoS programming*/
+static void bnxt_re_worker(struct work_struct *work)
+{
+ struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
+ worker.work);
+
+ bnxt_re_setup_qos(rdev);
+ schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
+}
+
+static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
+{
+ int i, j, rc;
+
+ /* Registered a new RoCE device instance to netdev */
+ rc = bnxt_re_register_netdev(rdev);
+ if (rc) {
+ pr_err("Failed to register with netedev: %#x\n", rc);
+ return -EINVAL;
+ }
+ set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+
+ rc = bnxt_re_request_msix(rdev);
+ if (rc) {
+ pr_err("Failed to get MSI-X vectors: %#x\n", rc);
+ rc = -EINVAL;
+ goto fail;
+ }
+ set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags);
+
+ /* Establish RCFW Communication Channel to initialize the context
+ * memory for the function and all child VFs
+ */
+ rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_re_net_ring_alloc
+ (rdev, rdev->rcfw.creq.pbl[PBL_LVL_0].pg_map_arr,
+ rdev->rcfw.creq.pbl[rdev->rcfw.creq.level].pg_count,
+ HWRM_RING_ALLOC_CMPL, BNXT_QPLIB_CREQE_MAX_CNT - 1,
+ rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx,
+ &rdev->rcfw.creq_ring_id);
+ if (rc) {
+ pr_err("Failed to allocate CREQ: %#x\n", rc);
+ goto free_rcfw;
+ }
+ rc = bnxt_qplib_enable_rcfw_channel
+ (rdev->en_dev->pdev, &rdev->rcfw,
+ rdev->msix_entries[BNXT_RE_AEQ_IDX].vector,
+ rdev->msix_entries[BNXT_RE_AEQ_IDX].db_offset,
+ 0, &bnxt_re_aeq_handler);
+ if (rc) {
+ pr_err("Failed to enable RCFW channel: %#x\n", rc);
+ goto free_ring;
+ }
+
+ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
+ if (rc)
+ goto disable_rcfw;
+ bnxt_re_set_resource_limits(rdev);
+
+ rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0);
+ if (rc) {
+ pr_err("Failed to allocate QPLIB context: %#x\n", rc);
+ goto disable_rcfw;
+ }
+ rc = bnxt_re_net_stats_ctx_alloc(rdev,
+ rdev->qplib_ctx.stats.dma_map,
+ &rdev->qplib_ctx.stats.fw_id);
+ if (rc) {
+ pr_err("Failed to allocate stats context: %#x\n", rc);
+ goto free_ctx;
+ }
+
+ rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx, 0);
+ if (rc) {
+ pr_err("Failed to initialize RCFW: %#x\n", rc);
+ goto free_sctx;
+ }
+ set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags);
+
+ /* Resources based on the 'new' device caps */
+ rc = bnxt_re_alloc_res(rdev);
+ if (rc) {
+ pr_err("Failed to allocate resources: %#x\n", rc);
+ goto fail;
+ }
+ rc = bnxt_re_init_res(rdev);
+ if (rc) {
+ pr_err("Failed to initialize resources: %#x\n", rc);
+ goto fail;
+ }
+
+ rc = bnxt_re_setup_qos(rdev);
+ if (rc)
+ pr_info("RoCE priority not yet configured\n");
+
+ INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
+ set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
+ schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
+
+ /* Register ib dev */
+ rc = bnxt_re_register_ib(rdev);
+ if (rc) {
+ pr_err("Failed to register with IB: %#x\n", rc);
+ goto fail;
+ }
+ dev_info(rdev_to_dev(rdev), "Device registered successfully");
+ for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) {
+ rc = device_create_file(&rdev->ibdev.dev,
+ bnxt_re_attributes[i]);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to create IB sysfs: %#x", rc);
+ /* Must clean up all created device files */
+ for (j = 0; j < i; j++)
+ device_remove_file(&rdev->ibdev.dev,
+ bnxt_re_attributes[j]);
+ bnxt_re_unregister_ib(rdev);
+ goto fail;
+ }
+ }
+ set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);
+
+ return 0;
+free_sctx:
+ bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id, true);
+free_ctx:
+ bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
+disable_rcfw:
+ bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
+free_ring:
+ bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, true);
+free_rcfw:
+ bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
+fail:
+ bnxt_re_ib_unreg(rdev, true);
+ return rc;
+}
+
+static void bnxt_re_dev_unreg(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct net_device *netdev = rdev->netdev;
+
+ bnxt_re_dev_remove(rdev);
+
+ if (netdev)
+ bnxt_re_dev_unprobe(netdev, en_dev);
+}
+
+static int bnxt_re_dev_reg(struct bnxt_re_dev **rdev, struct net_device *netdev)
+{
+ struct bnxt_en_dev *en_dev;
+ int rc = 0;
+
+ if (!is_bnxt_re_dev(netdev))
+ return -ENODEV;
+
+ en_dev = bnxt_re_dev_probe(netdev);
+ if (IS_ERR(en_dev)) {
+ if (en_dev != ERR_PTR(-ENODEV))
+ pr_err("%s: Failed to probe\n", ROCE_DRV_MODULE_NAME);
+ rc = PTR_ERR(en_dev);
+ goto exit;
+ }
+ *rdev = bnxt_re_dev_add(netdev, en_dev);
+ if (!*rdev) {
+ rc = -ENOMEM;
+ bnxt_re_dev_unprobe(netdev, en_dev);
+ goto exit;
+ }
+exit:
+ return rc;
+}
+
+static void bnxt_re_remove_one(struct bnxt_re_dev *rdev)
+{
+ pci_dev_put(rdev->en_dev->pdev);
+}
+
+/* Handle all deferred netevents tasks */
+static void bnxt_re_task(struct work_struct *work)
+{
+ struct bnxt_re_work *re_work;
+ struct bnxt_re_dev *rdev;
+ int rc = 0;
+
+ re_work = container_of(work, struct bnxt_re_work, work);
+ rdev = re_work->rdev;
+
+ if (re_work->event != NETDEV_REGISTER &&
+ !test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
+ return;
+
+ switch (re_work->event) {
+ case NETDEV_REGISTER:
+ rc = bnxt_re_ib_reg(rdev);
+ if (rc)
+ dev_err(rdev_to_dev(rdev),
+ "Failed to register with IB: %#x", rc);
+ break;
+ case NETDEV_UP:
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
+ IB_EVENT_PORT_ACTIVE);
+ break;
+ case NETDEV_DOWN:
+ bnxt_re_dev_stop(rdev);
+ break;
+ case NETDEV_CHANGE:
+ if (!netif_carrier_ok(rdev->netdev))
+ bnxt_re_dev_stop(rdev);
+ else if (netif_carrier_ok(rdev->netdev))
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
+ IB_EVENT_PORT_ACTIVE);
+ break;
+ default:
+ break;
+ }
+ kfree(re_work);
+}
+
+static void bnxt_re_init_one(struct bnxt_re_dev *rdev)
+{
+ pci_dev_get(rdev->en_dev->pdev);
+}
+
+/*
+ * "Notifier chain callback can be invoked for the same chain from
+ * different CPUs at the same time".
+ *
+ * For cases when the netdev is already present, our call to the
+ * register_netdevice_notifier() will actually get the rtnl_lock()
+ * before sending NETDEV_REGISTER and (if up) NETDEV_UP
+ * events.
+ *
+ * But for cases when the netdev is not already present, the notifier
+ * chain is subjected to be invoked from different CPUs simultaneously.
+ *
+ * This is protected by the netdev_mutex.
+ */
+static int bnxt_re_netdev_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct net_device *real_dev, *netdev = netdev_notifier_info_to_dev(ptr);
+ struct bnxt_re_work *re_work;
+ struct bnxt_re_dev *rdev;
+ int rc = 0;
+ bool sch_work = false;
+
+ real_dev = rdma_vlan_dev_real_dev(netdev);
+ if (!real_dev)
+ real_dev = netdev;
+
+ rdev = bnxt_re_from_netdev(real_dev);
+ if (!rdev && event != NETDEV_REGISTER)
+ goto exit;
+ if (real_dev != netdev)
+ goto exit;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ if (rdev)
+ break;
+ rc = bnxt_re_dev_reg(&rdev, real_dev);
+ if (rc == -ENODEV)
+ break;
+ if (rc) {
+ pr_err("Failed to register with the device %s: %#x\n",
+ real_dev->name, rc);
+ break;
+ }
+ bnxt_re_init_one(rdev);
+ sch_work = true;
+ break;
+
+ case NETDEV_UNREGISTER:
+ bnxt_re_ib_unreg(rdev, false);
+ bnxt_re_remove_one(rdev);
+ bnxt_re_dev_unreg(rdev);
+ break;
+
+ default:
+ sch_work = true;
+ break;
+ }
+ if (sch_work) {
+ /* Allocate for the deferred task */
+ re_work = kzalloc(sizeof(*re_work), GFP_ATOMIC);
+ if (re_work) {
+ re_work->rdev = rdev;
+ re_work->event = event;
+ re_work->vlan_dev = (real_dev == netdev ?
+ NULL : netdev);
+ INIT_WORK(&re_work->work, bnxt_re_task);
+ queue_work(bnxt_re_wq, &re_work->work);
+ }
+ }
+
+exit:
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block bnxt_re_netdev_notifier = {
+ .notifier_call = bnxt_re_netdev_event
+};
+
+static int __init bnxt_re_mod_init(void)
+{
+ int rc = 0;
+
+ pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version);
+
+ bnxt_re_wq = create_singlethread_workqueue("bnxt_re");
+ if (!bnxt_re_wq)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&bnxt_re_dev_list);
+
+ rc = register_netdevice_notifier(&bnxt_re_netdev_notifier);
+ if (rc) {
+ pr_err("%s: Cannot register to netdevice_notifier",
+ ROCE_DRV_MODULE_NAME);
+ goto err_netdev;
+ }
+ return 0;
+
+err_netdev:
+ destroy_workqueue(bnxt_re_wq);
+
+ return rc;
+}
+
+static void __exit bnxt_re_mod_exit(void)
+{
+ unregister_netdevice_notifier(&bnxt_re_netdev_notifier);
+ if (bnxt_re_wq)
+ destroy_workqueue(bnxt_re_wq);
+}
+
+module_init(bnxt_re_mod_init);
+module_exit(bnxt_re_mod_exit);
dev = to_iwch_dev(ibdev);
netdev = dev->rdev.port_info.lldevs[port-1];
- memset(props, 0, sizeof(struct ib_port_attr));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
- if (netdev->mtu >= 4096)
- props->active_mtu = IB_MTU_4096;
- else if (netdev->mtu >= 2048)
- props->active_mtu = IB_MTU_2048;
- else if (netdev->mtu >= 1024)
- props->active_mtu = IB_MTU_1024;
- else if (netdev->mtu >= 512)
- props->active_mtu = IB_MTU_512;
- else
- props->active_mtu = IB_MTU_256;
+ props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
if (!netif_carrier_ok(netdev))
props->state = IB_PORT_DOWN;
struct ib_port_attr attr;
int err;
- err = iwch_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
dev->ibdev.num_comp_vectors = 1;
- dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
+ dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev;
dev->ibdev.query_device = iwch_query_device;
dev->ibdev.query_port = iwch_query_port;
dev->ibdev.query_pkey = iwch_query_pkey;
return -ENOSYS;
}
-static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
+void _c4iw_free_ucontext(struct kref *kref)
{
- struct c4iw_dev *rhp = to_c4iw_dev(context->device);
- struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
+ struct c4iw_ucontext *ucontext;
+ struct c4iw_dev *rhp;
struct c4iw_mm_entry *mm, *tmp;
- PDBG("%s context %p\n", __func__, context);
+ ucontext = container_of(kref, struct c4iw_ucontext, kref);
+ rhp = to_c4iw_dev(ucontext->ibucontext.device);
+
+ PDBG("%s ucontext %p\n", __func__, ucontext);
list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
kfree(mm);
c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx);
kfree(ucontext);
+}
+
+static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
+{
+ struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
+
+ PDBG("%s context %p\n", __func__, context);
+ c4iw_put_ucontext(ucontext);
return 0;
}
c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx);
INIT_LIST_HEAD(&context->mmaps);
spin_lock_init(&context->mmap_lock);
+ kref_init(&context->kref);
if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) {
if (!warned++)
dev = to_c4iw_dev(ibdev);
netdev = dev->rdev.lldi.ports[port-1];
-
- memset(props, 0, sizeof(struct ib_port_attr));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
- if (netdev->mtu >= 4096)
- props->active_mtu = IB_MTU_4096;
- else if (netdev->mtu >= 2048)
- props->active_mtu = IB_MTU_2048;
- else if (netdev->mtu >= 1024)
- props->active_mtu = IB_MTU_1024;
- else if (netdev->mtu >= 512)
- props->active_mtu = IB_MTU_512;
- else
- props->active_mtu = IB_MTU_256;
+ props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
if (!netif_carrier_ok(netdev))
props->state = IB_PORT_DOWN;
struct ib_port_attr attr;
int err;
- err = c4iw_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports;
dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq;
- dev->ibdev.dma_device = &(dev->rdev.lldi.pdev->dev);
+ dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev;
dev->ibdev.query_device = c4iw_query_device;
dev->ibdev.query_port = c4iw_query_port;
dev->ibdev.query_pkey = c4iw_query_pkey;
dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
dev->ibdev.get_port_immutable = c4iw_port_immutable;
dev->ibdev.get_dev_fw_str = get_dev_fw_str;
- dev->ibdev.drain_sq = c4iw_drain_sq;
- dev->ibdev.drain_rq = c4iw_drain_rq;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm)
/*
* Is the working set larger than the threshold?
*/
-static inline int wss_exceeds_threshold(void)
+static inline bool wss_exceeds_threshold(void)
{
return atomic_read(&wss.total_count) >= wss.threshold;
}
* @ss: the SGE state
* @data: the data to copy
* @length: the length of the data
+ * @release: boolean to release MR
* @copy_last: do a separate copy of the last 8 bytes
*/
void hfi1_copy_sge(
struct rvt_sge_state *ss,
void *data, u32 length,
- int release,
- int copy_last)
+ bool release,
+ bool copy_last)
{
struct rvt_sge *sge = &ss->sge;
- int in_last = 0;
int i;
- int cacheless_copy = 0;
+ bool in_last = false;
+ bool cacheless_copy = false;
if (sge_copy_mode == COPY_CACHELESS) {
cacheless_copy = length >= PAGE_SIZE;
if (length > 8) {
length -= 8;
} else {
- copy_last = 0;
- in_last = 1;
+ copy_last = false;
+ in_last = true;
}
}
again:
while (length) {
- u32 len = sge->length;
+ u32 len = rvt_get_sge_length(sge, length);
- if (len > length)
- len = length;
- if (len > sge->sge_length)
- len = sge->sge_length;
WARN_ON_ONCE(len == 0);
if (unlikely(in_last)) {
/* enforce byte transfer ordering */
} else {
memcpy(sge->vaddr, data, len);
}
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (release)
- rvt_put_mr(sge->mr);
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
+ rvt_update_sge(ss, len, release);
data += len;
length -= len;
}
if (copy_last) {
- copy_last = 0;
- in_last = 1;
+ copy_last = false;
+ in_last = true;
length = 8;
goto again;
}
}
-/**
- * hfi1_skip_sge - skip over SGE memory
- * @ss: the SGE state
- * @length: the number of bytes to skip
- */
-void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
-{
- struct rvt_sge *sge = &ss->sge;
-
- while (length) {
- u32 len = sge->length;
-
- if (len > length)
- len = length;
- if (len > sge->sge_length)
- len = sge->sge_length;
- WARN_ON_ONCE(len == 0);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (release)
- rvt_put_mr(sge->mr);
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
- length -= len;
- }
-}
-
/*
* Make sure the QP is ready and able to accept the given opcode.
*/
struct ib_header *hdr = packet->hdr;
u32 tlen = packet->tlen;
struct hfi1_pportdata *ppd = rcd->ppd;
- struct hfi1_ibport *ibp = &ppd->ibport_data;
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
opcode_handler packet_handler;
unsigned long flags;
hfi1_qp_wakeup(qp, RVT_S_WAIT_KMEM);
}
-void update_sge(struct rvt_sge_state *ss, u32 length)
-{
- struct rvt_sge *sge = &ss->sge;
-
- sge->vaddr += length;
- sge->length -= length;
- sge->sge_length -= length;
- if (sge->sge_length == 0) {
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- return;
- sge->n = 0;
- }
- sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
- }
-}
-
/*
* This is called with progress side lock held.
*/
len);
if (ret)
goto bail_txadd;
- update_sge(ss, len);
+ rvt_update_sge(ss, len, false);
length -= len;
}
return ret;
if (slen > len)
slen = len;
- update_sge(ss, slen);
+ rvt_update_sge(ss, slen, false);
seg_pio_copy_mid(pbuf, addr, slen);
len -= slen;
}
struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
u16 lid = ppd->lid;
+ /* props being zeroed by the caller, avoid zeroing it here */
props->lid = lid ? lid : 0;
props->lmc = ppd->lmc;
/* OPA logical states match IB logical states */
* external strings.
*/
static int init_cntr_names(const char *names_in,
- const int names_len,
+ const size_t names_len,
int num_extra_names,
int *num_cntrs,
const char ***cntr_names)
strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
ibdev->owner = THIS_MODULE;
ibdev->phys_port_cnt = dd->num_pports;
- ibdev->dma_device = &dd->pcidev->dev;
+ ibdev->dev.parent = &dd->pcidev->dev;
ibdev->modify_device = modify_device;
ibdev->alloc_hw_stats = alloc_hw_stats;
ibdev->get_hw_stats = get_hw_stats;
dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu;
dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
+ dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
/* completeion queue */
void hfi1_cnp_rcv(struct hfi1_packet *packet)
{
- struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
+ struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct ib_header *hdr = packet->hdr;
struct rvt_qp *qp = packet->qp;
*/
#include <linux/acpi.h>
#include <linux/of_platform.h>
+#include <linux/module.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
assert(port_num > 0);
port = port_num - 1;
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = hr_dev->caps.max_mtu;
props->gid_tbl_len = hr_dev->caps.gid_table_len[port];
struct ib_port_attr attr;
int ret;
- ret = hns_roce_query_port(ib_dev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+
+ ret = ib_query_port(ib_dev, port_num, &attr);
if (ret)
return ret;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
ib_dev->owner = THIS_MODULE;
ib_dev->node_type = RDMA_NODE_IB_CA;
- ib_dev->dma_device = dev;
+ ib_dev->dev.parent = dev;
ib_dev->phys_port_cnt = hr_dev->caps.num_ports;
ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey;
struct i40iw_device *iwdev = to_iwdev(ibdev);
struct net_device *netdev = iwdev->netdev;
- memset(props, 0, sizeof(*props));
-
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
- if (netdev->mtu >= 4096)
- props->active_mtu = IB_MTU_4096;
- else if (netdev->mtu >= 2048)
- props->active_mtu = IB_MTU_2048;
- else if (netdev->mtu >= 1024)
- props->active_mtu = IB_MTU_1024;
- else if (netdev->mtu >= 512)
- props->active_mtu = IB_MTU_512;
- else
- props->active_mtu = IB_MTU_256;
+ props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
props->lid = 1;
if (netif_carrier_ok(iwdev->netdev))
struct ib_port_attr attr;
int err;
- err = i40iw_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
(1ull << IB_USER_VERBS_CMD_POST_SEND);
iwibdev->ibdev.phys_port_cnt = 1;
iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count;
- iwibdev->ibdev.dma_device = &pcidev->dev;
iwibdev->ibdev.dev.parent = &pcidev->dev;
iwibdev->ibdev.query_port = i40iw_query_port;
iwibdev->ibdev.modify_port = i40iw_modify_port;
}
static int eth_link_query_port(struct ib_device *ibdev, u8 port,
- struct ib_port_attr *props, int netw_view)
+ struct ib_port_attr *props)
{
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
{
int err;
- memset(props, 0, sizeof *props);
+ /* props being zeroed by the caller, avoid zeroing it here */
err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
ib_link_query_port(ibdev, port, props, netw_view) :
- eth_link_query_port(ibdev, port, props, netw_view);
+ eth_link_query_port(ibdev, port, props);
return err;
}
mutex_lock(&mdev->cap_mask_mutex);
- err = mlx4_ib_query_port(ibdev, port, &attr);
+ err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
int err;
- err = mlx4_ib_query_port(ibdev, port_num, &attr);
- if (err)
- return err;
-
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
- immutable->gid_tbl_len = attr.gid_tbl_len;
-
if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
} else {
if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET;
+ if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE |
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP))
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
}
- immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
return 0;
}
ibdev->ib_dev.phys_port_cnt = mlx4_is_bonded(dev) ?
1 : ibdev->num_ports;
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
- ibdev->ib_dev.dma_device = &dev->persist->pdev->dev;
+ ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev;
ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev;
ibdev->ib_dev.add_gid = mlx4_ib_add_gid;
ibdev->ib_dev.del_gid = mlx4_ib_del_gid;
#include <linux/in.h>
#include <linux/etherdevice.h>
#include <linux/mlx5/fs.h>
+#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
#define DRIVER_NAME "mlx5_ib"
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRIVER_VERSION);
-static int deprecated_prof_sel = 2;
-module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
-MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
-
static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
enum ib_mtu ndev_ib_mtu;
u16 qkey_viol_cntr;
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->port_cap_flags |= IB_PORT_CM_SUP;
props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
}
+int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
+ int index, enum ib_gid_type *gid_type)
+{
+ struct ib_gid_attr attr;
+ union ib_gid gid;
+ int ret;
+
+ ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
+ if (ret)
+ return ret;
+
+ if (!attr.ndev)
+ return -ENODEV;
+
+ dev_put(attr.ndev);
+
+ *gid_type = attr.gid_type;
+
+ return 0;
+}
+
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
- if (MLX5_CAP_ETH(mdev, csum_cap))
+ if (MLX5_CAP_ETH(mdev, csum_cap)) {
+ /* Legacy bit to support old userspace libraries */
props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_IP_CSUM;
+ }
+
+ if (MLX5_CAP_ETH(dev->mdev, vlan_cap))
+ props->raw_packet_caps |=
+ IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
}
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
- MLX5_CAP_ETH(dev->mdev, scatter_fcs))
+ MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
+ /* Legacy bit to support old userspace libraries */
props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
+ }
if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
1 << MLX5_CAP_GEN(dev->mdev, log_max_rq);
}
- if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
- uhw->outlen)) {
- resp.mlx5_ib_support_multi_pkt_send_wqes =
- MLX5_CAP_ETH(mdev, multi_pkt_send_wqe);
- resp.response_length +=
- sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
- }
-
- if (field_avail(typeof(resp), reserved, uhw->outlen))
- resp.response_length += sizeof(resp.reserved);
-
if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
resp.cqe_comp_caps.max_num =
MLX5_CAP_GEN(dev->mdev, cqe_compression) ?
resp.response_length += sizeof(resp.packet_pacing_caps);
}
+ if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
+ uhw->outlen)) {
+ resp.mlx5_ib_support_multi_pkt_send_wqes =
+ MLX5_CAP_ETH(mdev, multi_pkt_send_wqe);
+ resp.response_length +=
+ sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
+ }
+
+ if (field_avail(typeof(resp), reserved, uhw->outlen))
+ resp.response_length += sizeof(resp.reserved);
+
if (uhw->outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
goto out;
}
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
if (err)
return err;
}
+static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask,
+ u32 value)
+{
+ struct mlx5_hca_vport_context ctx = {};
+ int err;
+
+ err = mlx5_query_hca_vport_context(dev->mdev, 0,
+ port_num, 0, &ctx);
+ if (err)
+ return err;
+
+ if (~ctx.cap_mask1_perm & mask) {
+ mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n",
+ mask, ctx.cap_mask1_perm);
+ return -EINVAL;
+ }
+
+ ctx.cap_mask1 = value;
+ ctx.cap_mask1_perm = mask;
+ err = mlx5_core_modify_hca_vport_context(dev->mdev, 0,
+ port_num, 0, &ctx);
+
+ return err;
+}
+
static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
struct ib_port_attr attr;
u32 tmp;
int err;
+ u32 change_mask;
+ u32 value;
+ bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) ==
+ IB_LINK_LAYER_INFINIBAND);
+
+ if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) {
+ change_mask = props->clr_port_cap_mask | props->set_port_cap_mask;
+ value = ~props->clr_port_cap_mask | props->set_port_cap_mask;
+ return set_port_caps_atomic(dev, port, change_mask, value);
+ }
mutex_lock(&dev->cap_mask_mutex);
- err = mlx5_ib_query_port(ibdev, port, &attr);
+ err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
return err;
}
+static void print_lib_caps(struct mlx5_ib_dev *dev, u64 caps)
+{
+ mlx5_ib_dbg(dev, "MLX5_LIB_CAP_4K_UAR = %s\n",
+ caps & MLX5_LIB_CAP_4K_UAR ? "y" : "n");
+}
+
+static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
+ struct mlx5_ib_alloc_ucontext_req_v2 *req,
+ u32 *num_sys_pages)
+{
+ int uars_per_sys_page;
+ int bfregs_per_sys_page;
+ int ref_bfregs = req->total_num_bfregs;
+
+ if (req->total_num_bfregs == 0)
+ return -EINVAL;
+
+ BUILD_BUG_ON(MLX5_MAX_BFREGS % MLX5_NON_FP_BFREGS_IN_PAGE);
+ BUILD_BUG_ON(MLX5_MAX_BFREGS < MLX5_NON_FP_BFREGS_IN_PAGE);
+
+ if (req->total_num_bfregs > MLX5_MAX_BFREGS)
+ return -ENOMEM;
+
+ uars_per_sys_page = get_uars_per_sys_page(dev, lib_uar_4k);
+ bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR;
+ req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page);
+ *num_sys_pages = req->total_num_bfregs / bfregs_per_sys_page;
+
+ if (req->num_low_latency_bfregs > req->total_num_bfregs - 1)
+ return -EINVAL;
+
+ mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, alloated %d, using %d sys pages\n",
+ MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no",
+ lib_uar_4k ? "yes" : "no", ref_bfregs,
+ req->total_num_bfregs, *num_sys_pages);
+
+ return 0;
+}
+
+static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
+{
+ struct mlx5_bfreg_info *bfregi;
+ int err;
+ int i;
+
+ bfregi = &context->bfregi;
+ for (i = 0; i < bfregi->num_sys_pages; i++) {
+ err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]);
+ if (err)
+ goto error;
+
+ mlx5_ib_dbg(dev, "allocated uar %d\n", bfregi->sys_pages[i]);
+ }
+ return 0;
+
+error:
+ for (--i; i >= 0; i--)
+ if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]))
+ mlx5_ib_warn(dev, "failed to free uar %d\n", i);
+
+ return err;
+}
+
+static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
+{
+ struct mlx5_bfreg_info *bfregi;
+ int err;
+ int i;
+
+ bfregi = &context->bfregi;
+ for (i = 0; i < bfregi->num_sys_pages; i++) {
+ err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
+ if (err) {
+ mlx5_ib_warn(dev, "failed to free uar %d\n", i);
+ return err;
+ }
+ }
+ return 0;
+}
+
static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct mlx5_ib_alloc_ucontext_req_v2 req = {};
struct mlx5_ib_alloc_ucontext_resp resp = {};
struct mlx5_ib_ucontext *context;
- struct mlx5_uuar_info *uuari;
- struct mlx5_uar *uars;
- int gross_uuars;
- int num_uars;
+ struct mlx5_bfreg_info *bfregi;
int ver;
- int uuarn;
int err;
- int i;
size_t reqlen;
size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
max_cqe_version);
+ bool lib_uar_4k;
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
if (req.flags)
return ERR_PTR(-EINVAL);
- if (req.total_num_uuars > MLX5_MAX_UUARS)
- return ERR_PTR(-ENOMEM);
-
- if (req.total_num_uuars == 0)
- return ERR_PTR(-EINVAL);
-
if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
return ERR_PTR(-EOPNOTSUPP);
- if (reqlen > sizeof(req) &&
- !ib_is_udata_cleared(udata, sizeof(req),
- reqlen - sizeof(req)))
- return ERR_PTR(-EOPNOTSUPP);
-
- req.total_num_uuars = ALIGN(req.total_num_uuars,
- MLX5_NON_FP_BF_REGS_PER_PAGE);
- if (req.num_low_latency_uuars > req.total_num_uuars - 1)
+ req.total_num_bfregs = ALIGN(req.total_num_bfregs,
+ MLX5_NON_FP_BFREGS_PER_UAR);
+ if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
return ERR_PTR(-EINVAL);
- num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
- gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
resp.cqe_version = min_t(__u8,
(__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
req.max_cqe_version);
+ resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
+ resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1;
resp.response_length = min(offsetof(typeof(resp), response_length) +
sizeof(resp.response_length), udata->outlen);
if (!context)
return ERR_PTR(-ENOMEM);
- uuari = &context->uuari;
- mutex_init(&uuari->lock);
- uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
- if (!uars) {
- err = -ENOMEM;
+ lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
+ bfregi = &context->bfregi;
+
+ /* updates req->total_num_bfregs */
+ err = calc_total_bfregs(dev, lib_uar_4k, &req, &bfregi->num_sys_pages);
+ if (err)
goto out_ctx;
- }
- uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
- sizeof(*uuari->bitmap),
+ mutex_init(&bfregi->lock);
+ bfregi->lib_uar_4k = lib_uar_4k;
+ bfregi->count = kcalloc(req.total_num_bfregs, sizeof(*bfregi->count),
GFP_KERNEL);
- if (!uuari->bitmap) {
+ if (!bfregi->count) {
err = -ENOMEM;
- goto out_uar_ctx;
- }
- /*
- * clear all fast path uuars
- */
- for (i = 0; i < gross_uuars; i++) {
- uuarn = i & 3;
- if (uuarn == 2 || uuarn == 3)
- set_bit(i, uuari->bitmap);
+ goto out_ctx;
}
- uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
- if (!uuari->count) {
+ bfregi->sys_pages = kcalloc(bfregi->num_sys_pages,
+ sizeof(*bfregi->sys_pages),
+ GFP_KERNEL);
+ if (!bfregi->sys_pages) {
err = -ENOMEM;
- goto out_bitmap;
+ goto out_count;
}
- for (i = 0; i < num_uars; i++) {
- err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
- if (err)
- goto out_count;
- }
+ err = allocate_uars(dev, context);
+ if (err)
+ goto out_sys_pages;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
+ context->upd_xlt_page = __get_free_page(GFP_KERNEL);
+ if (!context->upd_xlt_page) {
+ err = -ENOMEM;
+ goto out_uars;
+ }
+ mutex_init(&context->upd_xlt_page_mutex);
+
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
err = mlx5_core_alloc_transport_domain(dev->mdev,
&context->tdn);
if (err)
- goto out_uars;
+ goto out_page;
}
INIT_LIST_HEAD(&context->vma_private_list);
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
- resp.tot_uuars = req.total_num_uuars;
+ resp.tot_bfregs = req.total_num_bfregs;
resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
if (field_avail(typeof(resp), cqe_version, udata->outlen))
resp.response_length += sizeof(resp.cmds_supp_uhw);
}
+ if (field_avail(typeof(resp), eth_min_inline, udata->outlen)) {
+ if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
+ mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline);
+ resp.eth_min_inline++;
+ }
+ resp.response_length += sizeof(resp.eth_min_inline);
+ }
+
/*
* We don't want to expose information from the PCI bar that is located
* after 4096 bytes, so if the arch only supports larger pages, let's
* pretend we don't support reading the HCA's core clock. This is also
* forced by mmap function.
*/
- if (PAGE_SIZE <= 4096 &&
- field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
- resp.comp_mask |=
- MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
- resp.hca_core_clock_offset =
- offsetof(struct mlx5_init_seg, internal_timer_h) %
- PAGE_SIZE;
+ if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
+ if (PAGE_SIZE <= 4096) {
+ resp.comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
+ resp.hca_core_clock_offset =
+ offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE;
+ }
resp.response_length += sizeof(resp.hca_core_clock_offset) +
sizeof(resp.reserved2);
}
+ if (field_avail(typeof(resp), log_uar_size, udata->outlen))
+ resp.response_length += sizeof(resp.log_uar_size);
+
+ if (field_avail(typeof(resp), num_uars_per_page, udata->outlen))
+ resp.response_length += sizeof(resp.num_uars_per_page);
+
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto out_td;
- uuari->ver = ver;
- uuari->num_low_latency_uuars = req.num_low_latency_uuars;
- uuari->uars = uars;
- uuari->num_uars = num_uars;
+ bfregi->ver = ver;
+ bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
context->cqe_version = resp.cqe_version;
+ context->lib_caps = req.lib_caps;
+ print_lib_caps(dev, context->lib_caps);
return &context->ibucontext;
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+out_page:
+ free_page(context->upd_xlt_page);
+
out_uars:
- for (i--; i >= 0; i--)
- mlx5_cmd_free_uar(dev->mdev, uars[i].index);
-out_count:
- kfree(uuari->count);
+ deallocate_uars(dev, context);
-out_bitmap:
- kfree(uuari->bitmap);
+out_sys_pages:
+ kfree(bfregi->sys_pages);
-out_uar_ctx:
- kfree(uars);
+out_count:
+ kfree(bfregi->count);
out_ctx:
kfree(context);
+
return ERR_PTR(err);
}
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
- struct mlx5_uuar_info *uuari = &context->uuari;
- int i;
+ struct mlx5_bfreg_info *bfregi;
+ bfregi = &context->bfregi;
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
- for (i = 0; i < uuari->num_uars; i++) {
- if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
- mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
- }
-
- kfree(uuari->count);
- kfree(uuari->bitmap);
- kfree(uuari->uars);
+ free_page(context->upd_xlt_page);
+ deallocate_uars(dev, context);
+ kfree(bfregi->sys_pages);
+ kfree(bfregi->count);
kfree(context);
return 0;
}
-static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
+static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi,
+ int idx)
{
- return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
+ int fw_uars_per_page;
+
+ fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1;
+
+ return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) +
+ bfregi->sys_pages[idx] / fw_uars_per_page;
}
static int get_command(unsigned long offset)
struct vm_area_struct *vma,
struct mlx5_ib_ucontext *context)
{
- struct mlx5_uuar_info *uuari = &context->uuari;
+ struct mlx5_bfreg_info *bfregi = &context->bfregi;
int err;
unsigned long idx;
phys_addr_t pfn, pa;
pgprot_t prot;
+ int uars_per_page;
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k);
+ idx = get_index(vma->vm_pgoff);
+ if (idx % uars_per_page ||
+ idx * uars_per_page >= bfregi->num_sys_pages) {
+ mlx5_ib_warn(dev, "invalid uar index %lu\n", idx);
+ return -EINVAL;
+ }
switch (cmd) {
case MLX5_IB_MMAP_WC_PAGE:
return -EINVAL;
}
- if (vma->vm_end - vma->vm_start != PAGE_SIZE)
- return -EINVAL;
-
- idx = get_index(vma->vm_pgoff);
- if (idx >= uuari->num_uars)
- return -EINVAL;
-
- pfn = uar_index2pfn(dev, uuari->uars[idx].index);
+ pfn = uar_index2pfn(dev, bfregi, idx);
mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
vma->vm_page_prot = prot;
#define LAST_IPV6_FIELD traffic_class
#define LAST_TCP_UDP_FIELD src_port
#define LAST_TUNNEL_FIELD tunnel_id
+#define LAST_FLOW_TAG_FIELD tag_id
/* Field is the last supported field */
#define FIELDS_NOT_SUPPORTED(filter, field)\
sizeof(filter.field))
static int parse_flow_attr(u32 *match_c, u32 *match_v,
- const union ib_flow_spec *ib_spec)
+ const union ib_flow_spec *ib_spec, u32 *tag_id)
{
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
misc_parameters);
switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
case IB_FLOW_SPEC_ETH:
if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dmac_47_16),
if (ib_spec->eth.mask.vlan_tag) {
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- vlan_tag, 1);
+ cvlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- vlan_tag, 1);
+ cvlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
break;
case IB_FLOW_SPEC_IPV4:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ethertype, 0xffff);
break;
case IB_FLOW_SPEC_IPV6:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ethertype, 0xffff);
case IB_FLOW_SPEC_TCP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
LAST_TCP_UDP_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
0xff);
case IB_FLOW_SPEC_UDP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
LAST_TCP_UDP_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
0xff);
case IB_FLOW_SPEC_VXLAN_TUNNEL:
if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
LAST_TUNNEL_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
ntohl(ib_spec->tunnel.mask.tunnel_id));
MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
ntohl(ib_spec->tunnel.val.tunnel_id));
break;
+ case IB_FLOW_SPEC_ACTION_TAG:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
+ LAST_FLOW_TAG_FIELD))
+ return -EOPNOTSUPP;
+ if (ib_spec->flow_tag.tag_id >= BIT(24))
+ return -EINVAL;
+
+ *tag_id = ib_spec->flow_tag.tag_id;
+ break;
default:
return -EINVAL;
}
struct mlx5_flow_spec *spec;
const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
unsigned int spec_index;
+ u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
int err = 0;
if (!is_valid_attr(flow_attr))
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
err = parse_flow_attr(spec->match_criteria,
- spec->match_value, ib_flow);
+ spec->match_value, ib_flow, &flow_tag);
if (err < 0)
goto free;
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
- flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+
+ if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
+ (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+ mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
+ flow_tag, flow_attr->type);
+ err = -EINVAL;
+ goto free;
+ }
+ flow_act.flow_tag = flow_tag;
handler->rule = mlx5_add_flow_rules(ft, spec,
&flow_act,
dst, 1);
ibdev->ib_active = false;
}
+static int set_has_smi_cap(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_hca_vport_context vport_ctx;
+ int err;
+ int port;
+
+ for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
+ dev->mdev->port_caps[port - 1].has_smi = false;
+ if (MLX5_CAP_GEN(dev->mdev, port_type) ==
+ MLX5_CAP_PORT_TYPE_IB) {
+ if (MLX5_CAP_GEN(dev->mdev, ib_virt)) {
+ err = mlx5_query_hca_vport_context(dev->mdev, 0,
+ port, 0,
+ &vport_ctx);
+ if (err) {
+ mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n",
+ port, err);
+ return err;
+ }
+ dev->mdev->port_caps[port - 1].has_smi =
+ vport_ctx.has_smi;
+ } else {
+ dev->mdev->port_caps[port - 1].has_smi = true;
+ }
+ }
+ }
+ return 0;
+}
+
static void get_ext_port_caps(struct mlx5_ib_dev *dev)
{
int port;
if (!dprops)
goto out;
+ err = set_has_smi_cap(dev);
+ if (err)
+ goto out;
+
err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
if (err) {
mlx5_ib_warn(dev, "query_device failed %d\n", err);
}
for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
+ memset(pprops, 0, sizeof(*pprops));
err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
if (err) {
mlx5_ib_warn(dev, "query_port %d failed %d\n",
if (ll == IB_LINK_LAYER_INFINIBAND)
return RDMA_CORE_PORT_IBA_IB;
+ ret = RDMA_CORE_PORT_RAW_PACKET;
+
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
- return 0;
+ return ret;
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
- return 0;
+ return ret;
if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
ret |= RDMA_CORE_PORT_IBA_ROCE;
enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
int err;
- err = mlx5_ib_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = get_core_cap_flags(ibdev);
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
mlx5_nic_vport_disable_roce(dev->mdev);
}
+struct mlx5_ib_q_counter {
+ const char *name;
+ size_t offset;
+};
+
+#define INIT_Q_COUNTER(_name) \
+ { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
+
+static const struct mlx5_ib_q_counter basic_q_cnts[] = {
+ INIT_Q_COUNTER(rx_write_requests),
+ INIT_Q_COUNTER(rx_read_requests),
+ INIT_Q_COUNTER(rx_atomic_requests),
+ INIT_Q_COUNTER(out_of_buffer),
+};
+
+static const struct mlx5_ib_q_counter out_of_seq_q_cnts[] = {
+ INIT_Q_COUNTER(out_of_sequence),
+};
+
+static const struct mlx5_ib_q_counter retrans_q_cnts[] = {
+ INIT_Q_COUNTER(duplicate_request),
+ INIT_Q_COUNTER(rnr_nak_retry_err),
+ INIT_Q_COUNTER(packet_seq_err),
+ INIT_Q_COUNTER(implied_nak_seq_err),
+ INIT_Q_COUNTER(local_ack_timeout_err),
+};
+
static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
{
unsigned int i;
- for (i = 0; i < dev->num_ports; i++)
+ for (i = 0; i < dev->num_ports; i++) {
mlx5_core_dealloc_q_counter(dev->mdev,
- dev->port[i].q_cnt_id);
+ dev->port[i].q_cnts.set_id);
+ kfree(dev->port[i].q_cnts.names);
+ kfree(dev->port[i].q_cnts.offsets);
+ }
+}
+
+static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev,
+ const char ***names,
+ size_t **offsets,
+ u32 *num)
+{
+ u32 num_counters;
+
+ num_counters = ARRAY_SIZE(basic_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
+ num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
+ num_counters += ARRAY_SIZE(retrans_q_cnts);
+
+ *names = kcalloc(num_counters, sizeof(**names), GFP_KERNEL);
+ if (!*names)
+ return -ENOMEM;
+
+ *offsets = kcalloc(num_counters, sizeof(**offsets), GFP_KERNEL);
+ if (!*offsets)
+ goto err_names;
+
+ *num = num_counters;
+
+ return 0;
+
+err_names:
+ kfree(*names);
+ return -ENOMEM;
+}
+
+static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev,
+ const char **names,
+ size_t *offsets)
+{
+ int i;
+ int j = 0;
+
+ for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
+ names[j] = basic_q_cnts[i].name;
+ offsets[j] = basic_q_cnts[i].offset;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
+ for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
+ names[j] = out_of_seq_q_cnts[i].name;
+ offsets[j] = out_of_seq_q_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+ for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
+ names[j] = retrans_q_cnts[i].name;
+ offsets[j] = retrans_q_cnts[i].offset;
+ }
+ }
}
static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
int ret;
for (i = 0; i < dev->num_ports; i++) {
+ struct mlx5_ib_port *port = &dev->port[i];
+
ret = mlx5_core_alloc_q_counter(dev->mdev,
- &dev->port[i].q_cnt_id);
+ &port->q_cnts.set_id);
if (ret) {
mlx5_ib_warn(dev,
"couldn't allocate queue counter for port %d, err %d\n",
i + 1, ret);
goto dealloc_counters;
}
+
+ ret = __mlx5_ib_alloc_q_counters(dev,
+ &port->q_cnts.names,
+ &port->q_cnts.offsets,
+ &port->q_cnts.num_counters);
+ if (ret)
+ goto dealloc_counters;
+
+ mlx5_ib_fill_q_counters(dev, port->q_cnts.names,
+ port->q_cnts.offsets);
}
return 0;
dealloc_counters:
while (--i >= 0)
mlx5_core_dealloc_q_counter(dev->mdev,
- dev->port[i].q_cnt_id);
+ dev->port[i].q_cnts.set_id);
return ret;
}
-static const char * const names[] = {
- "rx_write_requests",
- "rx_read_requests",
- "rx_atomic_requests",
- "out_of_buffer",
- "out_of_sequence",
- "duplicate_request",
- "rnr_nak_retry_err",
- "packet_seq_err",
- "implied_nak_seq_err",
- "local_ack_timeout_err",
-};
-
-static const size_t stats_offsets[] = {
- MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests),
- MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests),
- MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests),
- MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer),
- MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence),
- MLX5_BYTE_OFF(query_q_counter_out, duplicate_request),
- MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err),
- MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err),
- MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err),
- MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err),
-};
-
static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
u8 port_num)
{
- BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets));
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_port *port = &dev->port[port_num - 1];
/* We support only per port stats */
if (port_num == 0)
return NULL;
- return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names),
+ return rdma_alloc_hw_stats_struct(port->q_cnts.names,
+ port->q_cnts.num_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
- u8 port, int index)
+ u8 port_num, int index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_port *port = &dev->port[port_num - 1];
int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
void *out;
__be32 val;
int ret;
int i;
- if (!port || !stats)
+ if (!stats)
return -ENOSYS;
out = mlx5_vzalloc(outlen);
return -ENOMEM;
ret = mlx5_core_query_q_counter(dev->mdev,
- dev->port[port - 1].q_cnt_id, 0,
+ port->q_cnts.set_id, 0,
out, outlen);
if (ret)
goto free;
- for (i = 0; i < ARRAY_SIZE(names); i++) {
- val = *(__be32 *)(out + stats_offsets[i]);
+ for (i = 0; i < port->q_cnts.num_counters; i++) {
+ val = *(__be32 *)(out + port->q_cnts.offsets[i]);
stats->value[i] = (u64)be32_to_cpu(val);
}
+
free:
kvfree(out);
- return ARRAY_SIZE(names);
+ return port->q_cnts.num_counters;
}
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
- MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
-
if (!mlx5_lag_is_active(mdev))
name = "mlx5_%d";
else
dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.num_comp_vectors =
dev->mdev->priv.eq_table.num_comp_vectors;
- dev->ib_dev.dma_device = &mdev->pdev->dev;
+ dev->ib_dev.dev.parent = &mdev->pdev->dev;
dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
dev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
}
- if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
- MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+ if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
}
if (err)
goto err_rsrc;
- err = mlx5_ib_alloc_q_counters(dev);
+ if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
+ err = mlx5_ib_alloc_q_counters(dev);
+ if (err)
+ goto err_odp;
+ }
+
+ dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
+ if (!dev->mdev->priv.uar)
+ goto err_q_cnt;
+
+ err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
+ if (err)
+ goto err_uar_page;
+
+ err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true);
if (err)
- goto err_odp;
+ goto err_bfreg;
err = ib_register_device(&dev->ib_dev, NULL);
if (err)
- goto err_q_cnt;
+ goto err_fp_bfreg;
err = create_umr_res(dev);
if (err)
err_dev:
ib_unregister_device(&dev->ib_dev);
+err_fp_bfreg:
+ mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
+
+err_bfreg:
+ mlx5_free_bfreg(dev->mdev, &dev->bfreg);
+
+err_uar_page:
+ mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
+
err_q_cnt:
- mlx5_ib_dealloc_q_counters(dev);
+ if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ mlx5_ib_dealloc_q_counters(dev);
err_odp:
mlx5_ib_odp_remove_one(dev);
mlx5_remove_netdev_notifier(dev);
ib_unregister_device(&dev->ib_dev);
- mlx5_ib_dealloc_q_counters(dev);
+ mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
+ mlx5_free_bfreg(dev->mdev, &dev->bfreg);
+ mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
+ if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ mlx5_ib_dealloc_q_counters(dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
.add = mlx5_ib_add,
.remove = mlx5_ib_remove,
.event = mlx5_ib_event,
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ .pfault = mlx5_ib_pfault,
+#endif
.protocol = MLX5_INTERFACE_PROTOCOL_IB,
};
{
int err;
- if (deprecated_prof_sel != 2)
- pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
-
- err = mlx5_ib_odp_init();
- if (err)
- return err;
+ mlx5_ib_odp_init();
err = mlx5_register_interface(&mlx5_ib_interface);
- if (err)
- goto clean_odp;
-
- return err;
-clean_odp:
- mlx5_ib_odp_cleanup();
return err;
}
static void __exit mlx5_ib_cleanup(void)
{
mlx5_unregister_interface(&mlx5_ib_interface);
- mlx5_ib_odp_cleanup();
}
module_init(mlx5_ib_init);
};
#define MLX5_UMR_ALIGN 2048
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-static __be64 mlx5_ib_update_mtt_emergency_buffer[
- MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
- __aligned(MLX5_UMR_ALIGN);
-static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
-#endif
static int clean_mr(struct mlx5_ib_mr *mr);
+static int use_umr(struct mlx5_ib_dev *dev, int order);
+static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
return;
}
+ mr->mmkey.type = MLX5_MKEY_MR;
spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
key = dev->mdev->priv.mkey_key++;
spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
if (err)
pr_err("Error inserting to mkey tree. 0x%x\n", -err);
write_unlock_irqrestore(&table->lock, flags);
+
+ if (!completion_done(&ent->compl))
+ complete(&ent->compl);
}
static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
struct mlx5_cache_ent *ent = &cache->ent[c];
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
- int npages = 1 << ent->order;
void *mkc;
u32 *in;
int err = 0;
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
- MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_SET(mkc, mkc, access_mode, ent->access_mode);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
- MLX5_SET(mkc, mkc, log_page_size, 12);
+ MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
+ MLX5_SET(mkc, mkc, log_page_size, ent->page);
spin_lock_irq(&ent->lock);
ent->pending++;
__cache_work_func(ent);
}
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
+{
+ struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_cache_ent *ent;
+ struct mlx5_ib_mr *mr;
+ int err;
+
+ if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
+ mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
+ return NULL;
+ }
+
+ ent = &cache->ent[entry];
+ while (1) {
+ spin_lock_irq(&ent->lock);
+ if (list_empty(&ent->head)) {
+ spin_unlock_irq(&ent->lock);
+
+ err = add_keys(dev, entry, 1);
+ if (err && err != -EAGAIN)
+ return ERR_PTR(err);
+
+ wait_for_completion(&ent->compl);
+ } else {
+ mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
+ list);
+ list_del(&mr->list);
+ ent->cur--;
+ spin_unlock_irq(&ent->lock);
+ if (ent->cur < ent->limit)
+ queue_work(cache->wq, &ent->work);
+ return mr;
+ }
+ }
+}
+
static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
{
struct mlx5_mr_cache *cache = &dev->cache;
int i;
c = order2idx(dev, order);
- if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
+ if (c < 0 || c > MAX_UMR_CACHE_ENTRY) {
mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
return NULL;
}
- for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
+ for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) {
ent = &cache->ent[i];
mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
return mr;
}
-static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
return;
}
+
+ if (unreg_umr(dev, mr))
+ return;
+
ent = &cache->ent[c];
spin_lock_irq(&ent->lock);
list_add_tail(&mr->list, &ent->head);
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
- int limit;
int err;
int i;
setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
- INIT_LIST_HEAD(&cache->ent[i].head);
- spin_lock_init(&cache->ent[i].lock);
-
ent = &cache->ent[i];
INIT_LIST_HEAD(&ent->head);
spin_lock_init(&ent->lock);
ent->order = i + 2;
ent->dev = dev;
+ ent->limit = 0;
- if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
- (mlx5_core_is_pf(dev->mdev)))
- limit = dev->mdev->profile->mr_cache[i].limit;
- else
- limit = 0;
-
+ init_completion(&ent->compl);
INIT_WORK(&ent->work, cache_work_func);
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
- ent->limit = limit;
queue_work(cache->wq, &ent->work);
+
+ if (i > MAX_UMR_CACHE_ENTRY) {
+ mlx5_odp_init_mr_cache_entry(ent);
+ continue;
+ }
+
+ if (!use_umr(dev, ent->order))
+ continue;
+
+ ent->page = PAGE_SHIFT;
+ ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
+ MLX5_IB_UMR_OCTOWORD;
+ ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
+ if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+ mlx5_core_is_pf(dev->mdev))
+ ent->limit = dev->mdev->profile->mr_cache[i].limit;
+ else
+ ent->limit = 0;
}
err = mlx5_mr_cache_debugfs_init(dev);
goto err_in;
kfree(in);
+ mr->mmkey.type = MLX5_MKEY_MR;
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
mr->umem = NULL;
return (npages + 1) / 2;
}
-static int use_umr(int order)
+static int use_umr(struct mlx5_ib_dev *dev, int order)
{
+ if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
+ return order <= MAX_UMR_CACHE_ENTRY + 2;
return order <= MLX5_MAX_UMR_SHIFT;
}
-static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int npages, int page_shift, int *size,
- __be64 **mr_pas, dma_addr_t *dma)
-{
- __be64 *pas;
- struct device *ddev = dev->ib_dev.dev.parent;
-
- /*
- * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
- * To avoid copying garbage after the pas array, we allocate
- * a little more.
- */
- *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
- *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
- if (!(*mr_pas))
- return -ENOMEM;
-
- pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN);
- mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
- /* Clear padding after the actual pages. */
- memset(pas + npages, 0, *size - npages * sizeof(u64));
-
- *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE);
- if (dma_mapping_error(ddev, *dma)) {
- kfree(*mr_pas);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr,
- struct ib_sge *sg, u64 dma, int n, u32 key,
- int page_shift)
-{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
- sg->addr = dma;
- sg->length = ALIGN(sizeof(u64) * n, 64);
- sg->lkey = dev->umrc.pd->local_dma_lkey;
-
- wr->next = NULL;
- wr->sg_list = sg;
- if (n)
- wr->num_sge = 1;
- else
- wr->num_sge = 0;
-
- wr->opcode = MLX5_IB_WR_UMR;
-
- umrwr->npages = n;
- umrwr->page_shift = page_shift;
- umrwr->mkey = key;
-}
-
-static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
- struct ib_sge *sg, u64 dma, int n, u32 key,
- int page_shift, u64 virt_addr, u64 len,
- int access_flags)
-{
- struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
- prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift);
-
- wr->send_flags = 0;
-
- umrwr->target.virt_addr = virt_addr;
- umrwr->length = len;
- umrwr->access_flags = access_flags;
- umrwr->pd = pd;
-}
-
-static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
- struct ib_send_wr *wr, u32 key)
-{
- struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
- wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
- wr->opcode = MLX5_IB_WR_UMR;
- umrwr->mkey = key;
-}
-
static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
int access_flags, struct ib_umem **umem,
int *npages, int *page_shift, int *ncont,
init_completion(&context->done);
}
+static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
+ struct mlx5_umr_wr *umrwr)
+{
+ struct umr_common *umrc = &dev->umrc;
+ struct ib_send_wr *bad;
+ int err;
+ struct mlx5_ib_umr_context umr_context;
+
+ mlx5_ib_init_umr_context(&umr_context);
+ umrwr->wr.wr_cqe = &umr_context.cqe;
+
+ down(&umrc->sem);
+ err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
+ if (err) {
+ mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
+ } else {
+ wait_for_completion(&umr_context.done);
+ if (umr_context.status != IB_WC_SUCCESS) {
+ mlx5_ib_warn(dev, "reg umr failed (%u)\n",
+ umr_context.status);
+ err = -EFAULT;
+ }
+ }
+ up(&umrc->sem);
+ return err;
+}
+
static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
u64 virt_addr, u64 len, int npages,
int page_shift, int order, int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct device *ddev = dev->ib_dev.dev.parent;
- struct umr_common *umrc = &dev->umrc;
- struct mlx5_ib_umr_context umr_context;
- struct mlx5_umr_wr umrwr = {};
- struct ib_send_wr *bad;
struct mlx5_ib_mr *mr;
- struct ib_sge sg;
- int size;
- __be64 *mr_pas;
- dma_addr_t dma;
int err = 0;
int i;
if (!mr)
return ERR_PTR(-EAGAIN);
- err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas,
- &dma);
- if (err)
- goto free_mr;
-
- mlx5_ib_init_umr_context(&umr_context);
+ mr->ibmr.pd = pd;
+ mr->umem = umem;
+ mr->access_flags = access_flags;
+ mr->desc_size = sizeof(struct mlx5_mtt);
+ mr->mmkey.iova = virt_addr;
+ mr->mmkey.size = len;
+ mr->mmkey.pd = to_mpd(pd)->pdn;
- umrwr.wr.wr_cqe = &umr_context.cqe;
- prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
- page_shift, virt_addr, len, access_flags);
+ err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
+ MLX5_IB_UPD_XLT_ENABLE);
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
if (err) {
- mlx5_ib_warn(dev, "post send failed, err %d\n", err);
- goto unmap_dma;
- } else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed\n");
- err = -EFAULT;
- }
+ mlx5_mr_cache_free(dev, mr);
+ return ERR_PTR(err);
}
- mr->mmkey.iova = virt_addr;
- mr->mmkey.size = len;
- mr->mmkey.pd = to_mpd(pd)->pdn;
-
mr->live = 1;
-unmap_dma:
- up(&umrc->sem);
- dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
+ return mr;
+}
+
+static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
+ void *xlt, int page_shift, size_t size,
+ int flags)
+{
+ struct mlx5_ib_dev *dev = mr->dev;
+ struct ib_umem *umem = mr->umem;
+ if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
+ mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
+ return npages;
+ }
- kfree(mr_pas);
+ npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
-free_mr:
- if (err) {
- free_cached_mr(dev, mr);
- return ERR_PTR(err);
+ if (!(flags & MLX5_IB_UPD_XLT_ZAP)) {
+ __mlx5_ib_populate_pas(dev, umem, page_shift,
+ idx, npages, xlt,
+ MLX5_IB_MTT_PRESENT);
+ /* Clear padding after the pages
+ * brought from the umem.
+ */
+ memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0,
+ size - npages * sizeof(struct mlx5_mtt));
}
- return mr;
+ return npages;
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
- int zap)
+#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
+ MLX5_UMR_MTT_ALIGNMENT)
+#define MLX5_SPARE_UMR_CHUNK 0x10000
+
+int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
+ int page_shift, int flags)
{
struct mlx5_ib_dev *dev = mr->dev;
- struct device *ddev = dev->ib_dev.dma_device;
+ struct device *ddev = dev->ib_dev.dev.parent;
- struct umr_common *umrc = &dev->umrc;
- struct mlx5_ib_umr_context umr_context;
- struct ib_umem *umem = mr->umem;
+ struct mlx5_ib_ucontext *uctx = NULL;
int size;
- __be64 *pas;
+ void *xlt;
dma_addr_t dma;
- struct ib_send_wr *bad;
struct mlx5_umr_wr wr;
struct ib_sge sg;
int err = 0;
- const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
- const int page_index_mask = page_index_alignment - 1;
+ int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
+ ? sizeof(struct mlx5_klm)
+ : sizeof(struct mlx5_mtt);
+ const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
+ const int page_mask = page_align - 1;
size_t pages_mapped = 0;
size_t pages_to_map = 0;
size_t pages_iter = 0;
- int use_emergency_buf = 0;
+ gfp_t gfp;
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
- * so we need to align the offset and length accordingly */
- if (start_page_index & page_index_mask) {
- npages += start_page_index & page_index_mask;
- start_page_index &= ~page_index_mask;
+ * so we need to align the offset and length accordingly
+ */
+ if (idx & page_mask) {
+ npages += idx & page_mask;
+ idx &= ~page_mask;
}
- pages_to_map = ALIGN(npages, page_index_alignment);
+ gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
+ gfp |= __GFP_ZERO | __GFP_NOWARN;
- if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
- return -EINVAL;
+ pages_to_map = ALIGN(npages, page_align);
+ size = desc_size * pages_to_map;
+ size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
- size = sizeof(u64) * pages_to_map;
- size = min_t(int, PAGE_SIZE, size);
- /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
- * code, when we are called from an invalidation. The pas buffer must
- * be 2k-aligned for Connect-IB. */
- pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
- if (!pas) {
- mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
- pas = mlx5_ib_update_mtt_emergency_buffer;
- size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
- use_emergency_buf = 1;
- mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
- memset(pas, 0, size);
+ xlt = (void *)__get_free_pages(gfp, get_order(size));
+ if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
+ mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
+ size, get_order(size), MLX5_SPARE_UMR_CHUNK);
+
+ size = MLX5_SPARE_UMR_CHUNK;
+ xlt = (void *)__get_free_pages(gfp, get_order(size));
+ }
+
+ if (!xlt) {
+ uctx = to_mucontext(mr->ibmr.uobject->context);
+ mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
+ size = PAGE_SIZE;
+ xlt = (void *)uctx->upd_xlt_page;
+ mutex_lock(&uctx->upd_xlt_page_mutex);
+ memset(xlt, 0, size);
}
- pages_iter = size / sizeof(u64);
- dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
+ pages_iter = size / desc_size;
+ dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
if (dma_mapping_error(ddev, dma)) {
- mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
+ mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
err = -ENOMEM;
- goto free_pas;
+ goto free_xlt;
}
+ sg.addr = dma;
+ sg.lkey = dev->umrc.pd->local_dma_lkey;
+
+ memset(&wr, 0, sizeof(wr));
+ wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
+ if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
+ wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
+ wr.wr.sg_list = &sg;
+ wr.wr.num_sge = 1;
+ wr.wr.opcode = MLX5_IB_WR_UMR;
+
+ wr.pd = mr->ibmr.pd;
+ wr.mkey = mr->mmkey.key;
+ wr.length = mr->mmkey.size;
+ wr.virt_addr = mr->mmkey.iova;
+ wr.access_flags = mr->access_flags;
+ wr.page_shift = page_shift;
+
for (pages_mapped = 0;
pages_mapped < pages_to_map && !err;
- pages_mapped += pages_iter, start_page_index += pages_iter) {
+ pages_mapped += pages_iter, idx += pages_iter) {
dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
-
- npages = min_t(size_t,
- pages_iter,
- ib_umem_num_pages(umem) - start_page_index);
-
- if (!zap) {
- __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
- start_page_index, npages, pas,
- MLX5_IB_MTT_PRESENT);
- /* Clear padding after the pages brought from the
- * umem. */
- memset(pas + npages, 0, size - npages * sizeof(u64));
- }
+ npages = populate_xlt(mr, idx, pages_iter, xlt,
+ page_shift, size, flags);
dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
- mlx5_ib_init_umr_context(&umr_context);
-
- memset(&wr, 0, sizeof(wr));
- wr.wr.wr_cqe = &umr_context.cqe;
-
- sg.addr = dma;
- sg.length = ALIGN(npages * sizeof(u64),
- MLX5_UMR_MTT_ALIGNMENT);
- sg.lkey = dev->umrc.pd->local_dma_lkey;
+ sg.length = ALIGN(npages * desc_size,
+ MLX5_UMR_MTT_ALIGNMENT);
+
+ if (pages_mapped + pages_iter >= pages_to_map) {
+ if (flags & MLX5_IB_UPD_XLT_ENABLE)
+ wr.wr.send_flags |=
+ MLX5_IB_SEND_UMR_ENABLE_MR |
+ MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
+ MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
+ if (flags & MLX5_IB_UPD_XLT_PD ||
+ flags & MLX5_IB_UPD_XLT_ACCESS)
+ wr.wr.send_flags |=
+ MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
+ if (flags & MLX5_IB_UPD_XLT_ADDR)
+ wr.wr.send_flags |=
+ MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
+ }
- wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
- MLX5_IB_SEND_UMR_UPDATE_MTT;
- wr.wr.sg_list = &sg;
- wr.wr.num_sge = 1;
- wr.wr.opcode = MLX5_IB_WR_UMR;
- wr.npages = sg.length / sizeof(u64);
- wr.page_shift = PAGE_SHIFT;
- wr.mkey = mr->mmkey.key;
- wr.target.offset = start_page_index;
+ wr.offset = idx * desc_size;
+ wr.xlt_size = sg.length;
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &wr.wr, &bad);
- if (err) {
- mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
- } else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_err(dev, "UMR completion failed, code %d\n",
- umr_context.status);
- err = -EFAULT;
- }
- }
- up(&umrc->sem);
+ err = mlx5_ib_post_send_wait(dev, &wr);
}
dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
-free_pas:
- if (!use_emergency_buf)
- free_page((unsigned long)pas);
+free_xlt:
+ if (uctx)
+ mutex_unlock(&uctx->upd_xlt_page_mutex);
else
- mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
+ free_pages((unsigned long)xlt, get_order(size));
return err;
}
-#endif
/*
* If ibmr is NULL it will be allocated by reg_create.
goto err_1;
}
pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
- mlx5_ib_populate_pas(dev, umem, page_shift, pas,
- pg_cap ? MLX5_IB_MTT_PRESENT : 0);
+ if (!(access_flags & IB_ACCESS_ON_DEMAND))
+ mlx5_ib_populate_pas(dev, umem, page_shift, pas,
+ pg_cap ? MLX5_IB_MTT_PRESENT : 0);
/* The pg_access bit allows setting the access flags
* in the page list submitted with the command. */
mlx5_ib_warn(dev, "create mkey failed\n");
goto err_2;
}
+ mr->mmkey.type = MLX5_MKEY_MR;
+ mr->desc_size = sizeof(struct mlx5_mtt);
mr->umem = umem;
mr->dev = dev;
mr->live = 1;
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (!start && length == U64_MAX) {
+ if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
+ !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return ERR_PTR(-EINVAL);
+
+ mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
+ return &mr->ibmr;
+ }
+#endif
+
err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
&page_shift, &ncont, &order);
if (err < 0)
return ERR_PTR(err);
- if (use_umr(order)) {
+ if (use_umr(dev, order)) {
mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
order, access_flags);
if (PTR_ERR(mr) == -EAGAIN) {
mlx5_ib_dbg(dev, "cache empty for order %d", order);
mr = NULL;
}
- } else if (access_flags & IB_ACCESS_ON_DEMAND) {
+ } else if (access_flags & IB_ACCESS_ON_DEMAND &&
+ !MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
err = -EINVAL;
pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
goto error;
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct mlx5_core_dev *mdev = dev->mdev;
- struct umr_common *umrc = &dev->umrc;
- struct mlx5_ib_umr_context umr_context;
struct mlx5_umr_wr umrwr = {};
- struct ib_send_wr *bad;
- int err;
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
return 0;
- mlx5_ib_init_umr_context(&umr_context);
+ umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
+ MLX5_IB_SEND_UMR_FAIL_IF_FREE;
+ umrwr.wr.opcode = MLX5_IB_WR_UMR;
+ umrwr.mkey = mr->mmkey.key;
- umrwr.wr.wr_cqe = &umr_context.cqe;
- prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key);
-
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
- if (err) {
- up(&umrc->sem);
- mlx5_ib_dbg(dev, "err %d\n", err);
- goto error;
- } else {
- wait_for_completion(&umr_context.done);
- up(&umrc->sem);
- }
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "unreg umr failed\n");
- err = -EFAULT;
- goto error;
- }
- return 0;
-
-error:
- return err;
+ return mlx5_ib_post_send_wait(dev, &umrwr);
}
-static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
- u64 length, int npages, int page_shift, int order,
+static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
int access_flags, int flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct device *ddev = dev->ib_dev.dev.parent;
- struct mlx5_ib_umr_context umr_context;
- struct ib_send_wr *bad;
struct mlx5_umr_wr umrwr = {};
- struct ib_sge sg;
- struct umr_common *umrc = &dev->umrc;
- dma_addr_t dma = 0;
- __be64 *mr_pas = NULL;
- int size;
int err;
- mlx5_ib_init_umr_context(&umr_context);
-
- umrwr.wr.wr_cqe = &umr_context.cqe;
umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
- if (flags & IB_MR_REREG_TRANS) {
- err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size,
- &mr_pas, &dma);
- if (err)
- return err;
-
- umrwr.target.virt_addr = virt_addr;
- umrwr.length = length;
- umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
- }
-
- prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
- page_shift);
+ umrwr.wr.opcode = MLX5_IB_WR_UMR;
+ umrwr.mkey = mr->mmkey.key;
- if (flags & IB_MR_REREG_PD) {
+ if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
umrwr.pd = pd;
- umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD;
- }
-
- if (flags & IB_MR_REREG_ACCESS) {
umrwr.access_flags = access_flags;
- umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS;
+ umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
}
- /* post send request to UMR QP */
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
+ err = mlx5_ib_post_send_wait(dev, &umrwr);
- if (err) {
- mlx5_ib_warn(dev, "post send failed, err %d\n", err);
- } else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed (%u)\n",
- umr_context.status);
- err = -EFAULT;
- }
- }
-
- up(&umrc->sem);
- if (flags & IB_MR_REREG_TRANS) {
- dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
- kfree(mr_pas);
- }
return err;
}
u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address;
u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length;
int page_shift = 0;
+ int upd_flags = 0;
int npages = 0;
int ncont = 0;
int order = 0;
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
+ atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
+
if (flags != IB_MR_REREG_PD) {
/*
* Replace umem. This needs to be done whether or not UMR is
err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
&npages, &page_shift, &ncont, &order);
if (err < 0) {
- mr->umem = NULL;
+ clean_mr(mr);
return err;
}
}
/*
* Send a UMR WQE
*/
- err = rereg_umr(pd, mr, addr, len, npages, page_shift,
- order, access_flags, flags);
+ mr->ibmr.pd = pd;
+ mr->access_flags = access_flags;
+ mr->mmkey.iova = addr;
+ mr->mmkey.size = len;
+ mr->mmkey.pd = to_mpd(pd)->pdn;
+
+ if (flags & IB_MR_REREG_TRANS) {
+ upd_flags = MLX5_IB_UPD_XLT_ADDR;
+ if (flags & IB_MR_REREG_PD)
+ upd_flags |= MLX5_IB_UPD_XLT_PD;
+ if (flags & IB_MR_REREG_ACCESS)
+ upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
+ err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
+ upd_flags);
+ } else {
+ err = rereg_umr(pd, mr, access_flags, flags);
+ }
+
if (err) {
mlx5_ib_warn(dev, "Failed to rereg UMR\n");
+ ib_umem_release(mr->umem);
+ clean_mr(mr);
return err;
}
}
- if (flags & IB_MR_REREG_PD) {
- ib_mr->pd = pd;
- mr->mmkey.pd = to_mpd(pd)->pdn;
- }
+ set_mr_fileds(dev, mr, npages, len, access_flags);
- if (flags & IB_MR_REREG_ACCESS)
- mr->access_flags = access_flags;
-
- if (flags & IB_MR_REREG_TRANS) {
- atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
- set_mr_fileds(dev, mr, npages, len, access_flags);
- mr->mmkey.iova = addr;
- mr->mmkey.size = len;
- }
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
update_odp_mr(mr);
#endif
-
return 0;
}
mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
- mr->desc_map = dma_map_single(device->dma_device, mr->descs,
+ mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
size, DMA_TO_DEVICE);
- if (dma_mapping_error(device->dma_device, mr->desc_map)) {
+ if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
ret = -ENOMEM;
goto err;
}
struct ib_device *device = mr->ibmr.device;
int size = mr->max_descs * mr->desc_size;
- dma_unmap_single(device->dma_device, mr->desc_map,
+ dma_unmap_single(device->dev.parent, mr->desc_map,
size, DMA_TO_DEVICE);
kfree(mr->descs_alloc);
mr->descs = NULL;
return err;
}
} else {
- err = unreg_umr(dev, mr);
- if (err) {
- mlx5_ib_warn(dev, "failed unregister\n");
- return err;
- }
- free_cached_mr(dev, mr);
+ mlx5_mr_cache_free(dev, mr);
}
if (!umred)
/* Wait for all running page-fault handlers to finish. */
synchronize_srcu(&dev->mr_srcu);
/* Destroy all page mappings */
- mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
- ib_umem_end(umem));
+ if (umem->odp_data->page_list)
+ mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
+ ib_umem_end(umem));
+ else
+ mlx5_ib_free_implicit_mr(mr);
/*
* We kill the umem before the MR for ODP,
* so that there will not be any invalidations in
mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
err = mlx5_alloc_priv_descs(pd->device, mr,
- ndescs, sizeof(u64));
+ ndescs, sizeof(struct mlx5_mtt));
if (err)
goto err_free_in;
- mr->desc_size = sizeof(u64);
+ mr->desc_size = sizeof(struct mlx5_mtt);
mr->max_descs = ndescs;
} else if (mr_type == IB_MR_TYPE_SG_GAPS) {
mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
if (err)
goto err_destroy_psv;
+ mr->mmkey.type = MLX5_MKEY_MR;
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
mr->umem = NULL;
if (err)
goto free;
+ mw->mmkey.type = MLX5_MKEY_MW;
mw->ibmw.rkey = mw->mmkey.key;
resp.response_length = min(offsetof(typeof(resp), response_length) +
if (!in_mad || !out_mad)
goto out;
- memset(props, 0, sizeof *props);
+ /* props being zeroed by the caller, avoid zeroing it here */
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
return -ERESTARTSYS;
- err = mthca_query_port(ibdev, port, &attr);
+ err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
struct ib_port_attr attr;
int err;
- err = mthca_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
dev->ib_dev.num_comp_vectors = 1;
- dev->ib_dev.dma_device = &dev->pdev->dev;
+ dev->ib_dev.dev.parent = &dev->pdev->dev;
dev->ib_dev.query_device = mthca_query_device;
dev->ib_dev.query_port = mthca_query_port;
dev->ib_dev.modify_device = mthca_modify_device;
struct nes_vnic *nesvnic = to_nesvnic(ibdev);
struct net_device *netdev = nesvnic->netdev;
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
-
- if (netdev->mtu >= 4096)
- props->active_mtu = IB_MTU_4096;
- else if (netdev->mtu >= 2048)
- props->active_mtu = IB_MTU_2048;
- else if (netdev->mtu >= 1024)
- props->active_mtu = IB_MTU_1024;
- else if (netdev->mtu >= 512)
- props->active_mtu = IB_MTU_512;
- else
- props->active_mtu = IB_MTU_256;
+ props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
props->lid = 1;
props->lmc = 0;
struct ib_port_attr attr;
int err;
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
err = nes_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
nesibdev->ibdev.phys_port_cnt = 1;
nesibdev->ibdev.num_comp_vectors = 1;
- nesibdev->ibdev.dma_device = &nesdev->pcidev->dev;
nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev;
nesibdev->ibdev.query_device = nes_query_device;
nesibdev->ibdev.query_port = nes_query_port;
int err;
dev = get_ocrdma_dev(ibdev);
- err = ocrdma_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ if (ocrdma_is_udp_encap_supported(dev))
+ immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
- if (ocrdma_is_udp_encap_supported(dev))
- immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext;
dev->ibdev.mmap = ocrdma_mmap;
- dev->ibdev.dma_device = &dev->nic_info.pdev->dev;
+ dev->ibdev.dev.parent = &dev->nic_info.pdev->dev;
dev->ibdev.process_mad = ocrdma_process_mad;
dev->ibdev.get_port_immutable = ocrdma_port_immutable;
dev->ibdev.get_port_immutable = qedr_port_immutable;
dev->ibdev.get_netdev = qedr_get_netdev;
- dev->ibdev.dma_device = &dev->pdev->dev;
+ dev->ibdev.dev.parent = &dev->pdev->dev;
dev->ibdev.get_link_layer = qedr_link_layer;
dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str;
return 0;
}
-void qedr_unaffiliated_event(void *context,
- u8 event_code)
+void qedr_unaffiliated_event(void *context, u8 event_code)
{
pr_err("unaffiliated event not implemented yet\n");
}
if (device_create_file(&dev->ibdev.dev, qedr_attributes[i]))
goto sysfs_err;
+ if (!test_and_set_bit(QEDR_ENET_STATE_BIT, &dev->enet_state))
+ qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ACTIVE);
+
DP_DEBUG(dev, QEDR_MSG_INIT, "qedr driver loaded successfully\n");
return dev;
ib_dealloc_device(&dev->ibdev);
}
-static int qedr_close(struct qedr_dev *dev)
+static void qedr_close(struct qedr_dev *dev)
{
- qedr_ib_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
-
- return 0;
+ if (test_and_clear_bit(QEDR_ENET_STATE_BIT, &dev->enet_state))
+ qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ERR);
}
static void qedr_shutdown(struct qedr_dev *dev)
qedr_remove(dev);
}
+static void qedr_open(struct qedr_dev *dev)
+{
+ if (!test_and_set_bit(QEDR_ENET_STATE_BIT, &dev->enet_state))
+ qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ACTIVE);
+}
+
static void qedr_mac_address_change(struct qedr_dev *dev)
{
union ib_gid *sgid = &dev->sgid_tbl[0];
ether_addr_copy(dev->gsi_ll2_mac_address, dev->ndev->dev_addr);
- qedr_ib_dispatch_event(dev, 1, IB_EVENT_GID_CHANGE);
+ qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_GID_CHANGE);
if (rc)
DP_ERR(dev, "Error updating mac filter\n");
{
switch (event) {
case QEDE_UP:
- qedr_ib_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
+ qedr_open(dev);
break;
case QEDE_DOWN:
qedr_close(dev);
struct rvt_sge *sge = &ss->sge;
while (length) {
- u32 len = sge->length;
+ u32 len = rvt_get_sge_length(sge, length);
- if (len > length)
- len = length;
- if (len > sge->sge_length)
- len = sge->sge_length;
- BUG_ON(len == 0);
+ WARN_ON_ONCE(len == 0);
memcpy(sge->vaddr, data, len);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (release)
- rvt_put_mr(sge->mr);
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
+ rvt_update_sge(ss, len, release);
data += len;
length -= len;
}
}
-/**
- * qib_skip_sge - skip over SGE memory - XXX almost dup of prev func
- * @ss: the SGE state
- * @length: the number of bytes to skip
- */
-void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
-{
- struct rvt_sge *sge = &ss->sge;
-
- while (length) {
- u32 len = sge->length;
-
- if (len > length)
- len = length;
- if (len > sge->sge_length)
- len = sge->sge_length;
- BUG_ON(len == 0);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (release)
- rvt_put_mr(sge->mr);
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
- length -= len;
- }
-}
-
/*
* Count the number of DMA descriptors needed to send length bytes of data.
* Don't modify the qib_sge_state to get the count.
}
}
-static void update_sge(struct rvt_sge_state *ss, u32 length)
-{
- struct rvt_sge *sge = &ss->sge;
-
- sge->vaddr += length;
- sge->length -= length;
- sge->sge_length -= length;
- if (sge->sge_length == 0) {
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- return;
- sge->n = 0;
- }
- sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
- }
-}
-
#ifdef __LITTLE_ENDIAN
static inline u32 get_upper_bits(u32 data, u32 shift)
{
data = clear_upper_bytes(v, extra, 0);
}
}
- update_sge(ss, len);
+ rvt_update_sge(ss, len, false);
length -= len;
}
/* Update address before sending packet. */
- update_sge(ss, length);
+ rvt_update_sge(ss, length, false);
if (flush_wc) {
/* must flush early everything before trigger word */
qib_flush_wc();
u32 *addr = (u32 *) ss->sge.vaddr;
/* Update address before sending packet. */
- update_sge(ss, len);
+ rvt_update_sge(ss, len, false);
if (flush_wc) {
qib_pio_copy(piobuf, addr, dwords - 1);
/* must flush early everything before trigger word */
enum ib_mtu mtu;
u16 lid = ppd->lid;
+ /* props being zeroed by the caller, avoid zeroing it here */
props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
props->lmc = ppd->lmc;
props->state = dd->f_iblink_state(ppd->lastibcstat);
ibdev->owner = THIS_MODULE;
ibdev->node_guid = ppd->guid;
ibdev->phys_port_cnt = dd->num_pports;
- ibdev->dma_device = &dd->pcidev->dev;
+ ibdev->dev.parent = &dd->pcidev->dev;
ibdev->modify_device = qib_modify_device;
ibdev->process_mad = qib_process_mad;
dd->verbs_dev.rdi.driver_f.stop_send_queue = qib_stop_send_queue;
dd->verbs_dev.rdi.driver_f.flush_qp_waiters = qib_flush_qp_waiters;
dd->verbs_dev.rdi.driver_f.notify_error_qp = qib_notify_error_qp;
+ dd->verbs_dev.rdi.driver_f.notify_restart_rc = qib_restart_rc;
dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu;
dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp;
dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr;
struct ib_port_attr attr;
int err;
- err = usnic_ib_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_USNIC;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC_UDP;
us_ibdev->ib_dev.phys_port_cnt = USNIC_IB_PORT_CNT;
us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS;
- us_ibdev->ib_dev.dma_device = &dev->dev;
+ us_ibdev->ib_dev.dev.parent = &dev->dev;
us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION;
strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX);
struct ib_port_attr attr;
int err;
- err = pvrdma_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
dev->flags = 0;
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.num_comp_vectors = 1;
- dev->ib_dev.dma_device = &dev->pdev->dev;
+ dev->ib_dev.dev.parent = &dev->pdev->dev;
dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION;
dev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n");
- if (dev->intr.type != PVRDMA_INTR_TYPE_MSIX) {
+ if (!dev->pdev->msix_enabled) {
/* Legacy intr */
icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR);
if (icr == 0)
return IRQ_HANDLED;
}
-static void pvrdma_disable_msi_all(struct pvrdma_dev *dev)
-{
- if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX)
- pci_disable_msix(dev->pdev);
- else if (dev->intr.type == PVRDMA_INTR_TYPE_MSI)
- pci_disable_msi(dev->pdev);
-}
-
static void pvrdma_free_irq(struct pvrdma_dev *dev)
{
int i;
dev_dbg(&dev->pdev->dev, "freeing interrupts\n");
-
- if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX) {
- for (i = 0; i < dev->intr.size; i++) {
- if (dev->intr.enabled[i]) {
- free_irq(dev->intr.msix_entry[i].vector, dev);
- dev->intr.enabled[i] = 0;
- }
- }
- } else if (dev->intr.type == PVRDMA_INTR_TYPE_INTX ||
- dev->intr.type == PVRDMA_INTR_TYPE_MSI) {
- free_irq(dev->pdev->irq, dev);
- }
+ for (i = 0; i < dev->nr_vectors; i++)
+ free_irq(pci_irq_vector(dev->pdev, i), dev);
}
static void pvrdma_enable_intrs(struct pvrdma_dev *dev)
pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0);
}
-static int pvrdma_enable_msix(struct pci_dev *pdev, struct pvrdma_dev *dev)
-{
- int i;
- int ret;
-
- for (i = 0; i < PVRDMA_MAX_INTERRUPTS; i++) {
- dev->intr.msix_entry[i].entry = i;
- dev->intr.msix_entry[i].vector = i;
-
- switch (i) {
- case 0:
- /* CMD ring handler */
- dev->intr.handler[i] = pvrdma_intr0_handler;
- break;
- case 1:
- /* Async event ring handler */
- dev->intr.handler[i] = pvrdma_intr1_handler;
- break;
- default:
- /* Completion queue handler */
- dev->intr.handler[i] = pvrdma_intrx_handler;
- break;
- }
- }
-
- ret = pci_enable_msix(pdev, dev->intr.msix_entry,
- PVRDMA_MAX_INTERRUPTS);
- if (!ret) {
- dev->intr.type = PVRDMA_INTR_TYPE_MSIX;
- dev->intr.size = PVRDMA_MAX_INTERRUPTS;
- } else if (ret > 0) {
- ret = pci_enable_msix(pdev, dev->intr.msix_entry, ret);
- if (!ret) {
- dev->intr.type = PVRDMA_INTR_TYPE_MSIX;
- dev->intr.size = ret;
- } else {
- dev->intr.size = 0;
- }
- }
-
- dev_dbg(&pdev->dev, "using interrupt type %d, size %d\n",
- dev->intr.type, dev->intr.size);
-
- return ret;
-}
-
static int pvrdma_alloc_intrs(struct pvrdma_dev *dev)
{
- int ret = 0;
- int i;
+ struct pci_dev *pdev = dev->pdev;
+ int ret = 0, i;
- if (pci_find_capability(dev->pdev, PCI_CAP_ID_MSIX) &&
- pvrdma_enable_msix(dev->pdev, dev)) {
- /* Try MSI */
- ret = pci_enable_msi(dev->pdev);
- if (!ret) {
- dev->intr.type = PVRDMA_INTR_TYPE_MSI;
- } else {
- /* Legacy INTR */
- dev->intr.type = PVRDMA_INTR_TYPE_INTX;
- }
+ ret = pci_alloc_irq_vectors(pdev, 1, PVRDMA_MAX_INTERRUPTS,
+ PCI_IRQ_MSIX);
+ if (ret < 0) {
+ ret = pci_alloc_irq_vectors(pdev, 1, 1,
+ PCI_IRQ_MSI | PCI_IRQ_LEGACY);
+ if (ret < 0)
+ return ret;
}
+ dev->nr_vectors = ret;
- /* Request First IRQ */
- switch (dev->intr.type) {
- case PVRDMA_INTR_TYPE_INTX:
- case PVRDMA_INTR_TYPE_MSI:
- ret = request_irq(dev->pdev->irq, pvrdma_intr0_handler,
- IRQF_SHARED, DRV_NAME, dev);
- if (ret) {
- dev_err(&dev->pdev->dev,
- "failed to request interrupt\n");
- goto disable_msi;
- }
- break;
- case PVRDMA_INTR_TYPE_MSIX:
- ret = request_irq(dev->intr.msix_entry[0].vector,
- pvrdma_intr0_handler, 0, DRV_NAME, dev);
- if (ret) {
- dev_err(&dev->pdev->dev,
- "failed to request interrupt 0\n");
- goto disable_msi;
- }
- dev->intr.enabled[0] = 1;
- break;
- default:
- /* Not reached */
- break;
+ ret = request_irq(pci_irq_vector(dev->pdev, 0), pvrdma_intr0_handler,
+ pdev->msix_enabled ? 0 : IRQF_SHARED, DRV_NAME, dev);
+ if (ret) {
+ dev_err(&dev->pdev->dev,
+ "failed to request interrupt 0\n");
+ goto out_free_vectors;
}
- /* For MSIX: request intr for each vector */
- if (dev->intr.size > 1) {
- ret = request_irq(dev->intr.msix_entry[1].vector,
- pvrdma_intr1_handler, 0, DRV_NAME, dev);
+ for (i = 1; i < dev->nr_vectors; i++) {
+ ret = request_irq(pci_irq_vector(dev->pdev, i),
+ i == 1 ? pvrdma_intr1_handler :
+ pvrdma_intrx_handler,
+ 0, DRV_NAME, dev);
if (ret) {
dev_err(&dev->pdev->dev,
- "failed to request interrupt 1\n");
- goto free_irq;
- }
- dev->intr.enabled[1] = 1;
-
- for (i = 2; i < dev->intr.size; i++) {
- ret = request_irq(dev->intr.msix_entry[i].vector,
- pvrdma_intrx_handler, 0,
- DRV_NAME, dev);
- if (ret) {
- dev_err(&dev->pdev->dev,
- "failed to request interrupt %d\n", i);
- goto free_irq;
- }
- dev->intr.enabled[i] = 1;
+ "failed to request interrupt %d\n", i);
+ goto free_irqs;
}
}
return 0;
-free_irq:
- pvrdma_free_irq(dev);
-disable_msi:
- pvrdma_disable_msi_all(dev);
+free_irqs:
+ while (--i >= 0)
+ free_irq(pci_irq_vector(dev->pdev, i), dev);
+out_free_vectors:
+ pci_free_irq_vectors(pdev);
return ret;
}
if (ret) {
dev_err(&pdev->dev, "failed to allocate interrupts\n");
ret = -ENOMEM;
- goto err_netdevice;
+ goto err_free_cq_ring;
}
/* Allocate UAR table. */
pvrdma_uar_table_cleanup(dev);
err_free_intrs:
pvrdma_free_irq(dev);
- pvrdma_disable_msi_all(dev);
-err_netdevice:
- unregister_netdevice_notifier(&dev->nb_netdev);
+ pci_free_irq_vectors(pdev);
err_free_cq_ring:
pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
err_free_async_ring:
pvrdma_disable_intrs(dev);
pvrdma_free_irq(dev);
- pvrdma_disable_msi_all(dev);
+ pci_free_irq_vectors(pdev);
/* Deactivate pvrdma device */
pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET);
#
obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o
- rdmavt-y := vt.o ah.o cq.o dma.o mad.o mcast.o mmap.o mr.o pd.o qp.o \
-rdmavt-y := vt.o ah.o cq.o mad.o mcast.o mmap.o mr.o pd.o qp.o srq.o \
- trace.o
++rdmavt-y := vt.o ah.o cq.o mad.o mcast.o mmap.o mr.o pd.o qp.o \
+ rc.o srq.o trace.o
CFLAGS_trace.o = -I$(src)
mr->mapsz = 0;
while (i)
kfree(mr->map[--i]);
+ percpu_ref_exit(&mr->refcount);
+}
+
+static void __rvt_mregion_complete(struct percpu_ref *ref)
+{
+ struct rvt_mregion *mr = container_of(ref, struct rvt_mregion,
+ refcount);
+
+ complete(&mr->comp);
}
static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
- int count)
+ int count, unsigned int percpu_flags)
{
int m, i = 0;
struct rvt_dev_info *dev = ib_to_rvt(pd->device);
for (; i < m; i++) {
mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL,
dev->dparms.node);
- if (!mr->map[i]) {
- rvt_deinit_mregion(mr);
- return -ENOMEM;
- }
+ if (!mr->map[i])
+ goto bail;
mr->mapsz++;
}
init_completion(&mr->comp);
/* count returning the ptr to user */
- atomic_set(&mr->refcount, 1);
+ if (percpu_ref_init(&mr->refcount, &__rvt_mregion_complete,
+ percpu_flags, GFP_KERNEL))
+ goto bail;
+
atomic_set(&mr->lkey_invalid, 0);
mr->pd = pd;
mr->max_segs = count;
return 0;
+bail:
+ rvt_deinit_mregion(mr);
+ return -ENOMEM;
}
/**
if (!tmr) {
rcu_assign_pointer(dev->dma_mr, mr);
mr->lkey_published = 1;
- } else {
- rvt_put_mr(mr);
+ rvt_get_mr(mr);
}
goto success;
}
int freed = 0;
spin_lock_irqsave(&rkt->lock, flags);
- if (!mr->lkey_published)
- goto out;
- if (lkey == 0) {
- RCU_INIT_POINTER(dev->dma_mr, NULL);
+ if (!lkey) {
+ if (mr->lkey_published) {
+ RCU_INIT_POINTER(dev->dma_mr, NULL);
+ rvt_put_mr(mr);
+ }
} else {
+ if (!mr->lkey_published)
+ goto out;
r = lkey >> (32 - dev->dparms.lkey_table_size);
RCU_INIT_POINTER(rkt->table[r], NULL);
}
spin_unlock_irqrestore(&rkt->lock, flags);
if (freed) {
synchronize_rcu();
- rvt_put_mr(mr);
+ percpu_ref_kill(&mr->refcount);
}
}
if (!mr)
goto bail;
- rval = rvt_init_mregion(&mr->mr, pd, count);
+ rval = rvt_init_mregion(&mr->mr, pd, count, 0);
if (rval)
goto bail;
/*
static void __rvt_free_mr(struct rvt_mr *mr)
{
- rvt_deinit_mregion(&mr->mr);
rvt_free_lkey(&mr->mr);
+ rvt_deinit_mregion(&mr->mr);
kfree(mr);
}
* @acc: access flags
*
* Return: the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the
- * struct ib_dma_mapping_ops functions (see dma.c).
+ * Note that all DMA addresses should be created via the functions in
+ * struct dma_virt_ops.
*/
struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
{
goto bail;
}
- rval = rvt_init_mregion(&mr->mr, pd, 0);
+ rval = rvt_init_mregion(&mr->mr, pd, 0, 0);
if (rval) {
ret = ERR_PTR(rval);
goto bail;
timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ);
if (!timeout) {
rvt_pr_err(rdi,
- "rvt_dereg_mr timeout mr %p pd %p refcount %u\n",
- mr, mr->mr.pd, atomic_read(&mr->mr.refcount));
+ "rvt_dereg_mr timeout mr %p pd %p\n",
+ mr, mr->mr.pd);
rvt_get_mr(&mr->mr);
ret = -EBUSY;
goto out;
if (!fmr)
goto bail;
- rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages);
+ rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages,
+ PERCPU_REF_INIT_ATOMIC);
if (rval)
goto bail;
struct rvt_fmr *fmr = to_ifmr(ibfmr);
struct rvt_lkey_table *rkt;
unsigned long flags;
- int m, n, i;
+ int m, n;
+ unsigned long i;
u32 ps;
struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device);
- i = atomic_read(&fmr->mr.refcount);
+ i = atomic_long_read(&fmr->mr.refcount.count);
if (i > 2)
return -EBUSY;
/*
* We use LKEY == zero for kernel virtual addresses
- * (see rvt_get_dma_mr and dma.c).
+ * (see rvt_get_dma_mr() and dma_virt_ops).
*/
rcu_read_lock();
if (sge->lkey == 0) {
/*
* We use RKEY == zero for kernel virtual addresses
- * (see rvt_get_dma_mr and dma.c).
+ * (see rvt_get_dma_mr() and dma_virt_ops).
*/
rcu_read_lock();
if (rkey == 0) {
#include <linux/module.h>
#include <linux/kernel.h>
+ #include <linux/dma-mapping.h>
#include "vt.h"
#include "trace.h"
return -EINVAL;
rvp = rdi->ports[port_index];
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->sm_lid = rvp->sm_lid;
props->sm_sl = rvp->sm_sl;
props->port_cap_flags = rvp->port_cap_flags;
if (port_index < 0)
return -EINVAL;
- err = rvt_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = rdi->dparms.core_cap_flags;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = rdi->dparms.core_cap_flags;
immutable->max_mad_size = rdi->dparms.max_mad_size;
return 0;
}
/* DMA Operations */
- rdi->ibdev.dma_ops =
- rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops;
+ rdi->ibdev.dev.dma_ops = rdi->ibdev.dev.dma_ops ? : &dma_virt_ops;
/* Protection Domain */
spin_lock_init(&rdi->n_pds_lock);
* SOFTWARE.
*/
+ #include <linux/dma-mapping.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
port = &rxe->port;
+ /* *attr being zeroed by the caller, avoid zeroing it here */
*attr = port->attr;
mutex_lock(&rxe->usdev_lock);
struct rxe_port *port;
if (unlikely(port_num != 1)) {
- dev_warn(device->dma_device, "invalid port_num = %d\n",
+ dev_warn(device->dev.parent, "invalid port_num = %d\n",
port_num);
goto err1;
}
port = &rxe->port;
if (unlikely(index >= port->attr.pkey_tbl_len)) {
- dev_warn(device->dma_device, "invalid index = %d\n",
+ dev_warn(device->dev.parent, "invalid index = %d\n",
index);
goto err1;
}
int err;
struct ib_port_attr attr;
- err = rxe_query_port(dev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ err = ib_query_port(dev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
dev->node_type = RDMA_NODE_IB_CA;
dev->phys_port_cnt = 1;
dev->num_comp_vectors = RXE_NUM_COMP_VECTORS;
- dev->dma_device = rxe_dma_device(rxe);
+ dev->dev.parent = rxe_dma_device(rxe);
dev->local_dma_lkey = 0;
dev->node_guid = rxe_node_guid(rxe);
- dev->dma_ops = &rxe_dma_mapping_ops;
+ dev->dev.dma_ops = &dma_virt_ops;
dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION;
dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
goto err_disable;
}
- if (ipoib_ib_dev_up(dev))
- goto err_stop;
+ ipoib_ib_dev_up(dev);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
return 0;
-err_stop:
- ipoib_ib_dev_stop(dev);
-
err_disable:
clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
return ret;
}
+static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
+{
+ struct ipoib_pseudo_header *phdr;
+
+ phdr = (struct ipoib_pseudo_header *)skb_push(skb, sizeof(*phdr));
+ memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
+}
+
void ipoib_flush_paths(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
}
if (skb_queue_len(&neigh->queue) <
IPOIB_MAX_PATH_REC_QUEUE) {
- /* put pseudoheader back on for next time */
- skb_push(skb, IPOIB_PSEUDO_LEN);
+ push_pseudo_header(skb, neigh->daddr);
__skb_queue_tail(&neigh->queue, skb);
} else {
ipoib_warn(priv, "queue length limit %d. Packet drop.\n",
if (!path->query && path_rec_start(dev, path))
goto err_path;
- if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
+ if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+ push_pseudo_header(skb, neigh->daddr);
__skb_queue_tail(&neigh->queue, skb);
- else
+ } else {
goto err_drop;
+ }
}
spin_unlock_irqrestore(&priv->lock, flags);
}
if (path) {
if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- /* put pseudoheader back on for next time */
- skb_push(skb, IPOIB_PSEUDO_LEN);
+ push_pseudo_header(skb, phdr->hwaddr);
__skb_queue_tail(&path->queue, skb);
} else {
++dev->stats.tx_dropped;
return;
} else if ((path->query || !path_rec_start(dev, path)) &&
skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- /* put pseudoheader back on for next time */
- skb_push(skb, IPOIB_PSEUDO_LEN);
+ push_pseudo_header(skb, phdr->hwaddr);
__skb_queue_tail(&path->queue, skb);
} else {
++dev->stats.tx_dropped;
}
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- /* put pseudoheader back on for next time */
- skb_push(skb, sizeof(*phdr));
+ push_pseudo_header(skb, phdr->hwaddr);
spin_lock_irqsave(&priv->lock, flags);
__skb_queue_tail(&neigh->queue, skb);
spin_unlock_irqrestore(&priv->lock, flags);
unsigned short type,
const void *daddr, const void *saddr, unsigned len)
{
- struct ipoib_pseudo_header *phdr;
struct ipoib_header *header;
header = (struct ipoib_header *) skb_push(skb, sizeof *header);
* destination address into skb hard header so we can figure out where
* to send the packet later.
*/
- phdr = (struct ipoib_pseudo_header *) skb_push(skb, sizeof(*phdr));
- memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
+ push_pseudo_header(skb, daddr);
return IPOIB_HARD_LEN;
}
return device_create_file(&dev->dev, &dev_attr_pkey);
}
-int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
+void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
{
priv->hca_caps = hca->attrs.device_cap_flags;
priv->dev->features |= priv->dev->hw_features;
}
-
- return 0;
}
static struct net_device *ipoib_add_port(const char *format,
if (!priv)
goto alloc_mem_failed;
- SET_NETDEV_DEV(priv->dev, hca->dma_device);
+ SET_NETDEV_DEV(priv->dev, hca->dev.parent);
priv->dev->dev_id = port - 1;
result = ib_query_port(hca, port, &attr);
goto device_init_failed;
}
- result = ipoib_set_dev_features(priv, hca);
- if (result)
- goto device_init_failed;
+ ipoib_set_dev_features(priv, hca);
/*
* Set the full membership bit, so that we join the right
SHOST_DIX_GUARD_CRC);
}
- /*
- * Limit the sg_tablesize and max_sectors based on the device
- * max fastreg page list length.
- */
- shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize,
- ib_conn->device->ib_device->attrs.max_fast_reg_page_list_len);
-
if (iscsi_host_add(shost,
- ib_conn->device->ib_device->dma_device)) {
+ ib_conn->device->ib_device->dev.parent)) {
mutex_unlock(&iser_conn->state_mutex);
goto free_host;
}
max_fr_sectors = ((shost->sg_tablesize - 1) * PAGE_SIZE) >> 9;
shost->max_sectors = min(iser_max_sectors, max_fr_sectors);
+ iser_dbg("iser_conn %p, sg_tablesize %u, max_sectors %u\n",
+ iser_conn, shost->sg_tablesize,
+ shost->max_sectors);
+
if (cmds_max > max_cmds) {
iser_info("cmds_max changed from %u to %u\n",
cmds_max, max_cmds);
.change_queue_depth = scsi_change_queue_depth,
.sg_tablesize = ISCSI_ISER_DEF_SG_TABLESIZE,
.cmd_per_lun = ISER_DEF_CMD_PER_LUN,
+ .eh_timed_out = iscsi_eh_cmd_timed_out,
.eh_abort_handler = iscsi_eh_abort,
.eh_device_reset_handler= iscsi_eh_device_reset,
.eh_target_reset_handler = iscsi_eh_recover_target,
#include <linux/parser.h>
#include <linux/random.h>
#include <linux/jiffies.h>
+#include <linux/lockdep.h>
#include <rdma/ib_cache.h>
#include <linux/atomic.h>
* completion handler can access the queue pair while it is
* being destroyed.
*/
-static void srp_destroy_qp(struct ib_qp *qp)
+static void srp_destroy_qp(struct srp_rdma_ch *ch, struct ib_qp *qp)
{
- ib_drain_rq(qp);
+ spin_lock_irq(&ch->lock);
+ ib_process_cq_direct(ch->send_cq, -1);
+ spin_unlock_irq(&ch->lock);
+
+ ib_drain_qp(qp);
ib_destroy_qp(qp);
}
}
if (ch->qp)
- srp_destroy_qp(ch->qp);
+ srp_destroy_qp(ch, ch->qp);
if (ch->recv_cq)
ib_free_cq(ch->recv_cq);
if (ch->send_cq)
return 0;
err_qp:
- srp_destroy_qp(qp);
+ srp_destroy_qp(ch, qp);
err_send_cq:
ib_free_cq(send_cq);
ib_destroy_fmr_pool(ch->fmr_pool);
}
- srp_destroy_qp(ch->qp);
+ srp_destroy_qp(ch, ch->qp);
ib_free_cq(ch->send_cq);
ib_free_cq(ch->recv_cq);
s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
struct srp_iu *iu;
+ lockdep_assert_held(&ch->lock);
+
ib_process_cq_direct(ch->send_cq, -1);
if (list_empty(&ch->free_tx))
return iu;
}
+/*
+ * Note: if this function is called from inside ib_drain_sq() then it will
+ * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
+ * with status IB_WC_SUCCESS then that's a bug.
+ */
static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
return;
}
+ lockdep_assert_held(&ch->lock);
+
list_add(&iu->list, &ch->free_tx);
}
if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
spin_lock_irqsave(&ch->lock, flags);
ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
+ if (rsp->tag == ch->tsk_mgmt_tag) {
+ ch->tsk_mgmt_status = -1;
+ if (be32_to_cpu(rsp->resp_data_len) >= 4)
+ ch->tsk_mgmt_status = rsp->data[3];
+ complete(&ch->tsk_mgmt_done);
+ } else {
+ shost_printk(KERN_ERR, target->scsi_host,
+ "Received tsk mgmt response too late for tag %#llx\n",
+ rsp->tag);
+ }
spin_unlock_irqrestore(&ch->lock, flags);
-
- ch->tsk_mgmt_status = -1;
- if (be32_to_cpu(rsp->resp_data_len) >= 4)
- ch->tsk_mgmt_status = rsp->data[3];
- complete(&ch->tsk_mgmt_done);
} else {
scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
- if (scmnd) {
+ if (scmnd && scmnd->host_scribble) {
req = (void *)scmnd->host_scribble;
scmnd = srp_claim_req(ch, req, NULL, scmnd);
+ } else {
+ scmnd = NULL;
}
if (!scmnd) {
shost_printk(KERN_ERR, target->scsi_host,
}
static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
- u8 func)
+ u8 func, u8 *status)
{
struct srp_target_port *target = ch->target;
struct srp_rport *rport = target->rport;
struct ib_device *dev = target->srp_host->srp_dev->dev;
struct srp_iu *iu;
struct srp_tsk_mgmt *tsk_mgmt;
+ int res;
if (!ch->connected || target->qp_in_error)
return -1;
- init_completion(&ch->tsk_mgmt_done);
-
/*
* Lock the rport mutex to avoid that srp_create_ch_ib() is
* invoked while a task management function is being sent.
tsk_mgmt->opcode = SRP_TSK_MGMT;
int_to_scsilun(lun, &tsk_mgmt->lun);
- tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT;
tsk_mgmt->tsk_mgmt_func = func;
tsk_mgmt->task_tag = req_tag;
+ spin_lock_irq(&ch->lock);
+ ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
+ tsk_mgmt->tag = ch->tsk_mgmt_tag;
+ spin_unlock_irq(&ch->lock);
+
+ init_completion(&ch->tsk_mgmt_done);
+
ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
DMA_TO_DEVICE);
if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
return -1;
}
+ res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
+ msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
+ if (res > 0 && status)
+ *status = ch->tsk_mgmt_status;
mutex_unlock(&rport->mutex);
- if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
- msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
- return -1;
+ WARN_ON_ONCE(res < 0);
- return 0;
+ return res > 0 ? 0 : -1;
}
static int srp_abort(struct scsi_cmnd *scmnd)
shost_printk(KERN_ERR, target->scsi_host,
"Sending SRP abort for tag %#x\n", tag);
if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
- SRP_TSK_ABORT_TASK) == 0)
+ SRP_TSK_ABORT_TASK, NULL) == 0)
ret = SUCCESS;
else if (target->rport->state == SRP_RPORT_LOST)
ret = FAST_IO_FAIL;
struct srp_target_port *target = host_to_target(scmnd->device->host);
struct srp_rdma_ch *ch;
int i;
+ u8 status;
shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
ch = &target->ch[0];
if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
- SRP_TSK_LUN_RESET))
+ SRP_TSK_LUN_RESET, &status))
return FAILED;
- if (ch->tsk_mgmt_status)
+ if (status)
return FAILED;
for (i = 0; i < target->ch_count; i++) {
struct Scsi_Host *shost = sdev->host;
struct srp_target_port *target = host_to_target(shost);
struct srp_device *srp_dev = target->srp_host->srp_dev;
- struct ib_device *ibdev = srp_dev->dev;
- if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
+ if (true)
blk_queue_virt_boundary(sdev->request_queue,
~srp_dev->mr_page_mask);
.info = srp_target_info,
.queuecommand = srp_queuecommand,
.change_queue_depth = srp_change_queue_depth,
+ .eh_timed_out = srp_timed_out,
.eh_abort_handler = srp_abort,
.eh_device_reset_handler = srp_reset_device,
.eh_host_reset_handler = srp_reset_host,
sprintf(target->target_name, "SRP.T10:%016llX",
be64_to_cpu(target->id_ext));
- if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
+ if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
return -ENODEV;
memcpy(ids.port_id, &target->id_ext, 8);
ret = srp_connect_ch(ch, multich);
if (ret) {
shost_printk(KERN_ERR, target->scsi_host,
- PFX "Connection %d/%d failed\n",
+ PFX "Connection %d/%d to %pI6 failed\n",
ch_start + cpu_idx,
- target->ch_count);
+ target->ch_count,
+ ch->target->orig_dgid.raw);
if (node_idx == 0 && cpu_idx == 0) {
- goto err_disconnect;
+ goto free_ch;
} else {
srp_free_ch_ib(target, ch);
srp_free_req_data(target, ch);
err_disconnect:
srp_disconnect_target(target);
+free_ch:
for (i = 0; i < target->ch_count; i++) {
ch = &target->ch[i];
srp_free_ch_ib(target, ch);
host->port = port;
host->dev.class = &srp_class;
- host->dev.parent = device->dev->dma_device;
+ host->dev.parent = device->dev->dev.parent;
dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
if (device_register(&host->dev))
indirect_sg_entries = cmd_sg_entries;
}
+ if (indirect_sg_entries > SG_MAX_SEGMENTS) {
+ pr_warn("Clamping indirect_sg_entries to %u\n",
+ SG_MAX_SEGMENTS);
+ indirect_sg_entries = SG_MAX_SEGMENTS;
+ }
+
srp_remove_wq = create_workqueue("srp_remove");
if (!srp_remove_wq) {
ret = -ENOMEM;
struct ib_mad_reg_req reg_req;
struct ib_port_modify port_modify;
struct ib_port_attr port_attr;
+ __be16 *guid;
int ret;
memset(&port_modify, 0, sizeof(port_modify));
if (ret)
goto err_query_port;
+ sport->port_guid_wwn.priv = sport;
+ guid = (__be16 *)&sport->gid.global.interface_id;
snprintf(sport->port_guid, sizeof(sport->port_guid),
- "0x%016llx%016llx",
- be64_to_cpu(sport->gid.global.subnet_prefix),
- be64_to_cpu(sport->gid.global.interface_id));
+ "%04x:%04x:%04x:%04x",
+ be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
+ be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
+ sport->port_gid_wwn.priv = sport;
+ snprintf(sport->port_gid, sizeof(sport->port_gid),
+ "0x%016llx%016llx",
+ be64_to_cpu(sport->gid.global.subnet_prefix),
+ be64_to_cpu(sport->gid.global.interface_id));
if (!sport->mad_agent) {
memset(®_req, 0, sizeof(reg_req));
struct srp_login_rej *rej;
struct ib_cm_rep_param *rep_param;
struct srpt_rdma_ch *ch, *tmp_ch;
+ __be16 *guid;
u32 it_iu_len;
int i, ret = 0;
goto destroy_ib;
}
- /*
- * Use the initator port identifier as the session name, when
- * checking against se_node_acl->initiatorname[] this can be
- * with or without preceeding '0x'.
- */
+ guid = (__be16 *)¶m->primary_path->sgid.global.interface_id;
+ snprintf(ch->ini_guid, sizeof(ch->ini_guid), "%04x:%04x:%04x:%04x",
+ be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
+ be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
snprintf(ch->sess_name, sizeof(ch->sess_name), "0x%016llx%016llx",
be64_to_cpu(*(__be64 *)ch->i_port_id),
be64_to_cpu(*(__be64 *)(ch->i_port_id + 8)));
pr_debug("registering session %s\n", ch->sess_name);
- ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0,
+ if (sport->port_guid_tpg.se_tpg_wwn)
+ ch->sess = target_alloc_session(&sport->port_guid_tpg, 0, 0,
+ TARGET_PROT_NORMAL,
+ ch->ini_guid, ch, NULL);
+ if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
+ ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0,
TARGET_PROT_NORMAL, ch->sess_name, ch,
NULL);
/* Retry without leading "0x" */
- if (IS_ERR(ch->sess))
- ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0,
+ if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
+ ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0,
TARGET_PROT_NORMAL,
ch->sess_name + 2, ch, NULL);
- if (IS_ERR(ch->sess)) {
+ if (IS_ERR_OR_NULL(ch->sess)) {
pr_info("Rejected login because no ACL has been configured yet for initiator %s.\n",
ch->sess_name);
rej->reason = cpu_to_be32((PTR_ERR(ch->sess) == -ENOMEM) ?
return 0;
}
-static struct srpt_port *__srpt_lookup_port(const char *name)
+static struct se_wwn *__srpt_lookup_wwn(const char *name)
{
struct ib_device *dev;
struct srpt_device *sdev;
for (i = 0; i < dev->phys_port_cnt; i++) {
sport = &sdev->port[i];
- if (!strcmp(sport->port_guid, name))
- return sport;
+ if (strcmp(sport->port_guid, name) == 0)
+ return &sport->port_guid_wwn;
+ if (strcmp(sport->port_gid, name) == 0)
+ return &sport->port_gid_wwn;
}
}
return NULL;
}
-static struct srpt_port *srpt_lookup_port(const char *name)
+static struct se_wwn *srpt_lookup_wwn(const char *name)
{
- struct srpt_port *sport;
+ struct se_wwn *wwn;
spin_lock(&srpt_dev_lock);
- sport = __srpt_lookup_port(name);
+ wwn = __srpt_lookup_wwn(name);
spin_unlock(&srpt_dev_lock);
- return sport;
+ return wwn;
}
/**
struct ib_srq_init_attr srq_attr;
int i;
- pr_debug("device = %p, device->dma_ops = %p\n", device,
- device->dma_ops);
+ pr_debug("device = %p\n", device);
sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
if (!sdev)
return "srpt";
}
+static struct srpt_port *srpt_tpg_to_sport(struct se_portal_group *tpg)
+{
+ return tpg->se_tpg_wwn->priv;
+}
+
static char *srpt_get_fabric_wwn(struct se_portal_group *tpg)
{
- struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(tpg);
- return sport->port_guid;
+ WARN_ON_ONCE(tpg != &sport->port_guid_tpg &&
+ tpg != &sport->port_gid_tpg);
+ return tpg == &sport->port_guid_tpg ? sport->port_guid :
+ sport->port_gid;
}
static u16 srpt_get_tag(struct se_portal_group *tpg)
return srpt_get_cmd_state(ioctx);
}
+static int srpt_parse_guid(u64 *guid, const char *name)
+{
+ u16 w[4];
+ int ret = -EINVAL;
+
+ if (sscanf(name, "%hx:%hx:%hx:%hx", &w[0], &w[1], &w[2], &w[3]) != 4)
+ goto out;
+ *guid = get_unaligned_be64(w);
+ ret = 0;
+out:
+ return ret;
+}
+
/**
* srpt_parse_i_port_id() - Parse an initiator port ID.
* @name: ASCII representation of a 128-bit initiator port ID.
*/
static int srpt_init_nodeacl(struct se_node_acl *se_nacl, const char *name)
{
+ u64 guid;
u8 i_port_id[16];
+ int ret;
- if (srpt_parse_i_port_id(i_port_id, name) < 0) {
+ ret = srpt_parse_guid(&guid, name);
+ if (ret < 0)
+ ret = srpt_parse_i_port_id(i_port_id, name);
+ if (ret < 0)
pr_err("invalid initiator port ID %s\n", name);
- return -EINVAL;
- }
- return 0;
+ return ret;
}
static ssize_t srpt_tpg_attrib_srp_max_rdma_size_show(struct config_item *item,
char *page)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
return sprintf(page, "%u\n", sport->port_attrib.srp_max_rdma_size);
}
const char *page, size_t count)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
unsigned long val;
int ret;
char *page)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
return sprintf(page, "%u\n", sport->port_attrib.srp_max_rsp_size);
}
const char *page, size_t count)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
unsigned long val;
int ret;
char *page)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
return sprintf(page, "%u\n", sport->port_attrib.srp_sq_size);
}
const char *page, size_t count)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
unsigned long val;
int ret;
static ssize_t srpt_tpg_enable_show(struct config_item *item, char *page)
{
struct se_portal_group *se_tpg = to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
return snprintf(page, PAGE_SIZE, "%d\n", (sport->enabled) ? 1: 0);
}
const char *page, size_t count)
{
struct se_portal_group *se_tpg = to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
struct srpt_device *sdev = sport->sdev;
struct srpt_rdma_ch *ch;
unsigned long tmp;
struct config_group *group,
const char *name)
{
- struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn);
+ struct srpt_port *sport = wwn->priv;
+ static struct se_portal_group *tpg;
int res;
- /* Initialize sport->port_wwn and sport->port_tpg_1 */
- res = core_tpg_register(&sport->port_wwn, &sport->port_tpg_1, SCSI_PROTOCOL_SRP);
+ WARN_ON_ONCE(wwn != &sport->port_guid_wwn &&
+ wwn != &sport->port_gid_wwn);
+ tpg = wwn == &sport->port_guid_wwn ? &sport->port_guid_tpg :
+ &sport->port_gid_tpg;
+ res = core_tpg_register(wwn, tpg, SCSI_PROTOCOL_SRP);
if (res)
return ERR_PTR(res);
- return &sport->port_tpg_1;
+ return tpg;
}
/**
*/
static void srpt_drop_tpg(struct se_portal_group *tpg)
{
- struct srpt_port *sport = container_of(tpg,
- struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(tpg);
sport->enabled = false;
- core_tpg_deregister(&sport->port_tpg_1);
+ core_tpg_deregister(tpg);
}
/**
struct config_group *group,
const char *name)
{
- struct srpt_port *sport;
- int ret;
-
- sport = srpt_lookup_port(name);
- pr_debug("make_tport(%s)\n", name);
- ret = -EINVAL;
- if (!sport)
- goto err;
-
- return &sport->port_wwn;
-
-err:
- return ERR_PTR(ret);
+ return srpt_lookup_wwn(name) ? : ERR_PTR(-EINVAL);
}
/**
*/
static void srpt_drop_tport(struct se_wwn *wwn)
{
- struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn);
-
- pr_debug("drop_tport(%s\n", config_item_name(&sport->port_wwn.wwn_group.cg_item));
}
static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf)
* Domain for untranslated devices - only allocated
* if iommu=pt passed on kernel cmd line.
*/
-static const struct iommu_ops amd_iommu_ops;
+const struct iommu_ops amd_iommu_ops;
static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
int amd_iommu_max_glx_val = -1;
- static struct dma_map_ops amd_iommu_dma_ops;
+ static const struct dma_map_ops amd_iommu_dma_ops;
/*
* This struct contains device specific data for the IOMMU
static int iommu_init_device(struct device *dev)
{
struct iommu_dev_data *dev_data;
+ struct amd_iommu *iommu;
int devid;
if (dev->archdata.iommu)
if (devid < 0)
return devid;
+ iommu = amd_iommu_rlookup_table[devid];
+
dev_data = find_dev_data(devid);
if (!dev_data)
return -ENOMEM;
dev->archdata.iommu = dev_data;
- iommu_device_link(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev,
- dev);
+ iommu_device_link(&iommu->iommu, dev);
return 0;
}
static void iommu_uninit_device(struct device *dev)
{
- int devid;
struct iommu_dev_data *dev_data;
+ struct amd_iommu *iommu;
+ int devid;
devid = get_device_id(dev);
if (devid < 0)
return;
+ iommu = amd_iommu_rlookup_table[devid];
+
dev_data = search_dev_data(devid);
if (!dev_data)
return;
if (dev_data->domain)
detach_device(dev);
- iommu_device_unlink(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev,
- dev);
+ iommu_device_unlink(&iommu->iommu, dev);
iommu_group_remove_device(dev);
/* Remove dma-ops */
- dev->archdata.dma_ops = NULL;
+ dev->dma_ops = NULL;
/*
* We keep dev_data around for unplugged devices and reuse it when the
dev_name(dev));
iommu_ignore_device(dev);
- dev->archdata.dma_ops = &nommu_dma_ops;
+ dev->dma_ops = &nommu_dma_ops;
goto out;
}
init_iommu_group(dev);
if (domain->type == IOMMU_DOMAIN_IDENTITY)
dev_data->passthrough = true;
else
- dev->archdata.dma_ops = &amd_iommu_dma_ops;
+ dev->dma_ops = &amd_iommu_dma_ops;
out:
iommu_completion_wait(iommu);
return NULL;
page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
- get_order(size));
+ get_order(size), flag);
if (!page)
return NULL;
}
return check_device(dev);
}
- static struct dma_map_ops amd_iommu_dma_ops = {
+ static const struct dma_map_ops amd_iommu_dma_ops = {
.alloc = alloc_coherent,
.free = free_coherent,
.map_page = map_page,
return false;
}
-static void amd_iommu_get_dm_regions(struct device *dev,
- struct list_head *head)
+static void amd_iommu_get_resv_regions(struct device *dev,
+ struct list_head *head)
{
+ struct iommu_resv_region *region;
struct unity_map_entry *entry;
int devid;
return;
list_for_each_entry(entry, &amd_iommu_unity_map, list) {
- struct iommu_dm_region *region;
+ size_t length;
+ int prot = 0;
if (devid < entry->devid_start || devid > entry->devid_end)
continue;
- region = kzalloc(sizeof(*region), GFP_KERNEL);
+ length = entry->address_end - entry->address_start;
+ if (entry->prot & IOMMU_PROT_IR)
+ prot |= IOMMU_READ;
+ if (entry->prot & IOMMU_PROT_IW)
+ prot |= IOMMU_WRITE;
+
+ region = iommu_alloc_resv_region(entry->address_start,
+ length, prot,
+ IOMMU_RESV_DIRECT);
if (!region) {
pr_err("Out of memory allocating dm-regions for %s\n",
dev_name(dev));
return;
}
-
- region->start = entry->address_start;
- region->length = entry->address_end - entry->address_start;
- if (entry->prot & IOMMU_PROT_IR)
- region->prot |= IOMMU_READ;
- if (entry->prot & IOMMU_PROT_IW)
- region->prot |= IOMMU_WRITE;
-
list_add_tail(®ion->list, head);
}
+
+ region = iommu_alloc_resv_region(MSI_RANGE_START,
+ MSI_RANGE_END - MSI_RANGE_START + 1,
+ 0, IOMMU_RESV_RESERVED);
+ if (!region)
+ return;
+ list_add_tail(®ion->list, head);
+
+ region = iommu_alloc_resv_region(HT_RANGE_START,
+ HT_RANGE_END - HT_RANGE_START + 1,
+ 0, IOMMU_RESV_RESERVED);
+ if (!region)
+ return;
+ list_add_tail(®ion->list, head);
}
-static void amd_iommu_put_dm_regions(struct device *dev,
+static void amd_iommu_put_resv_regions(struct device *dev,
struct list_head *head)
{
- struct iommu_dm_region *entry, *next;
+ struct iommu_resv_region *entry, *next;
list_for_each_entry_safe(entry, next, head, list)
kfree(entry);
}
-static void amd_iommu_apply_dm_region(struct device *dev,
+static void amd_iommu_apply_resv_region(struct device *dev,
struct iommu_domain *domain,
- struct iommu_dm_region *region)
+ struct iommu_resv_region *region)
{
struct dma_ops_domain *dma_dom = to_dma_ops_domain(to_pdomain(domain));
unsigned long start, end;
WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL);
}
-static const struct iommu_ops amd_iommu_ops = {
+const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.domain_alloc = amd_iommu_domain_alloc,
.domain_free = amd_iommu_domain_free,
.add_device = amd_iommu_add_device,
.remove_device = amd_iommu_remove_device,
.device_group = amd_iommu_device_group,
- .get_dm_regions = amd_iommu_get_dm_regions,
- .put_dm_regions = amd_iommu_put_dm_regions,
- .apply_dm_region = amd_iommu_apply_dm_region,
+ .get_resv_regions = amd_iommu_get_resv_regions,
+ .put_resv_regions = amd_iommu_put_resv_regions,
+ .apply_resv_region = amd_iommu_apply_resv_region,
.pgsize_bitmap = AMD_IOMMU_PGSIZES,
};
#define NVME_RDMA_MAX_INLINE_SEGMENTS 1
-static const char *const nvme_rdma_cm_status_strs[] = {
- [NVME_RDMA_CM_INVALID_LEN] = "invalid length",
- [NVME_RDMA_CM_INVALID_RECFMT] = "invalid record format",
- [NVME_RDMA_CM_INVALID_QID] = "invalid queue ID",
- [NVME_RDMA_CM_INVALID_HSQSIZE] = "invalid host SQ size",
- [NVME_RDMA_CM_INVALID_HRQSIZE] = "invalid host RQ size",
- [NVME_RDMA_CM_NO_RSC] = "resource not found",
- [NVME_RDMA_CM_INVALID_IRD] = "invalid IRD",
- [NVME_RDMA_CM_INVALID_ORD] = "Invalid ORD",
-};
-
-static const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
-{
- size_t index = status;
-
- if (index < ARRAY_SIZE(nvme_rdma_cm_status_strs) &&
- nvme_rdma_cm_status_strs[index])
- return nvme_rdma_cm_status_strs[index];
- else
- return "unrecognized reason";
-};
-
/*
* We handle AEN commands ourselves and don't even let the
* block layer know about them.
struct sockaddr addr;
struct sockaddr_in addr_in;
};
+ union {
+ struct sockaddr src_addr;
+ struct sockaddr_in src_addr_in;
+ };
struct nvme_ctrl ctrl;
};
int idx, size_t queue_size)
{
struct nvme_rdma_queue *queue;
+ struct sockaddr *src_addr = NULL;
int ret;
queue = &ctrl->queues[idx];
}
queue->cm_error = -ETIMEDOUT;
- ret = rdma_resolve_addr(queue->cm_id, NULL, &ctrl->addr,
+ if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
+ src_addr = &ctrl->src_addr;
+
+ ret = rdma_resolve_addr(queue->cm_id, src_addr, &ctrl->addr,
NVME_RDMA_CONNECT_TIMEOUT_MS);
if (ret) {
dev_info(ctrl->ctrl.device,
}
static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
- struct request *rq, unsigned int map_len,
- struct nvme_command *c)
+ struct request *rq, struct nvme_command *c)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_device *dev = queue->device;
}
if (count == 1) {
- if (rq_data_dir(rq) == WRITE &&
- map_len <= nvme_rdma_inline_data_size(queue) &&
- nvme_rdma_queue_idx(queue))
+ if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
+ blk_rq_payload_bytes(rq) <=
+ nvme_rdma_inline_data_size(queue))
return nvme_rdma_map_sg_inline(queue, req, c);
if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
dev = nvme_rdma_find_get_device(queue->cm_id);
if (!dev) {
- dev_err(queue->cm_id->device->dma_device,
+ dev_err(queue->cm_id->device->dev.parent,
"no client data found!\n");
return -ECONNREFUSED;
}
struct request *rq)
{
if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
- struct nvme_command *cmd = (struct nvme_command *)rq->cmd;
+ struct nvme_command *cmd = nvme_req(rq)->cmd;
- if (rq->cmd_type != REQ_TYPE_DRV_PRIV ||
+ if (!blk_rq_is_passthrough(rq) ||
cmd->common.opcode != nvme_fabrics_command ||
cmd->fabrics.fctype != nvme_fabrics_type_connect)
return false;
struct nvme_command *c = sqe->data;
bool flush = false;
struct ib_device *dev;
- unsigned int map_len;
int ret;
WARN_ON_ONCE(rq->tag < 0);
blk_mq_start_request(rq);
- map_len = nvme_map_len(rq);
- ret = nvme_rdma_map_data(queue, rq, map_len, c);
+ ret = nvme_rdma_map_data(queue, rq, c);
if (ret < 0) {
dev_err(queue->ctrl->ctrl.device,
"Failed to map data (%d)\n", ret);
ib_dma_sync_single_for_device(dev, sqe->dma,
sizeof(struct nvme_command), DMA_TO_DEVICE);
- if (rq->cmd_type == REQ_TYPE_FS && req_op(rq) == REQ_OP_FLUSH)
+ if (req_op(rq) == REQ_OP_FLUSH)
flush = true;
ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
return;
}
- if (rq->cmd_type == REQ_TYPE_DRV_PRIV)
+ if (blk_rq_is_passthrough(rq))
error = rq->errors;
else
error = nvme_error_status(rq->errors);
goto out_free_ctrl;
}
+ if (opts->mask & NVMF_OPT_HOST_TRADDR) {
+ ret = nvme_rdma_parse_ipaddr(&ctrl->src_addr_in,
+ opts->host_traddr);
+ if (ret) {
+ pr_err("malformed src IP address passed: %s\n",
+ opts->host_traddr);
+ goto out_free_ctrl;
+ }
+ }
+
if (opts->mask & NVMF_OPT_TRSVCID) {
u16 port;
static struct nvmf_transport_ops nvme_rdma_transport = {
.name = "rdma",
.required_opts = NVMF_OPT_TRADDR,
- .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY,
+ .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
+ NVMF_OPT_HOST_TRADDR,
.create_ctrl = nvme_rdma_create_ctrl,
};
return ret;
}
- nvmf_register_transport(&nvme_rdma_transport);
- return 0;
+ return nvmf_register_transport(&nvme_rdma_transport);
}
static void __exit nvme_rdma_cleanup_module(void)
*
* @suspend: Called when a device on this bus wants to go to sleep mode.
* @resume: Called to bring a device on this bus out of sleep mode.
+ * @num_vf: Called to find out how many virtual functions a device on this
+ * bus supports.
* @pm: Power management operations of this bus, callback the specific
* device driver's pm-ops.
* @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU
int (*suspend)(struct device *dev, pm_message_t state);
int (*resume)(struct device *dev);
+ int (*num_vf)(struct device *dev);
+
const struct dev_pm_ops *pm;
const struct iommu_ops *iommu_ops;
#ifdef CONFIG_NUMA
int numa_node; /* NUMA node this device is close to */
#endif
+ const struct dma_map_ops *dma_ops;
u64 *dma_mask; /* dma mask (if dma'able device) */
u64 coherent_dma_mask;/* Like dma_mask, but for
alloc_coherent mappings as
extern void lock_device_hotplug(void);
extern void unlock_device_hotplug(void);
extern int lock_device_hotplug_sysfs(void);
+void assert_held_device_hotplug(void);
extern int device_offline(struct device *dev);
extern int device_online(struct device *dev);
extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
extern void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
+static inline int dev_num_vf(struct device *dev)
+{
+ if (dev->bus && dev->bus->num_vf)
+ return dev->bus->num_vf(dev);
+ return 0;
+}
+
/*
* Root device objects for grouping under /sys/devices
*/
*/
#define DMA_ATTR_NO_WARN (1UL << 8)
+/*
+ * DMA_ATTR_PRIVILEGED: used to indicate that the buffer is fully
+ * accessible at an elevated privilege level (and ideally inaccessible or
+ * at least read-only at lesser-privileged levels).
+ */
+#define DMA_ATTR_PRIVILEGED (1UL << 9)
+
/*
* A dma_addr_t can hold any valid DMA or bus address for the platform.
* It can be given to a device to use as a DMA source or target. A CPU cannot
int is_phys;
};
- extern struct dma_map_ops dma_noop_ops;
+ extern const struct dma_map_ops dma_noop_ops;
+ extern const struct dma_map_ops dma_virt_ops;
#define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
#ifdef CONFIG_HAS_DMA
#include <asm/dma-mapping.h>
+ static inline const struct dma_map_ops *get_dma_ops(struct device *dev)
+ {
+ if (dev && dev->dma_ops)
+ return dev->dma_ops;
+ return get_arch_dma_ops(dev ? dev->bus : NULL);
+ }
+
+ static inline void set_dma_ops(struct device *dev,
+ const struct dma_map_ops *dma_ops)
+ {
+ dev->dma_ops = dma_ops;
+ }
#else
/*
* Define the dma api to allow compilation but not linking of
* dma dependent code. Code that depends on the dma-mapping
* API needs to set 'depends on HAS_DMA' in its Kconfig
*/
- extern struct dma_map_ops bad_dma_ops;
- static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+ extern const struct dma_map_ops bad_dma_ops;
+ static inline const struct dma_map_ops *get_dma_ops(struct device *dev)
{
return &bad_dma_ops;
}
enum dma_data_direction dir,
unsigned long attrs)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
dma_addr_t addr;
kmemcheck_mark_initialized(ptr, size);
enum dma_data_direction dir,
unsigned long attrs)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
if (ops->unmap_page)
int nents, enum dma_data_direction dir,
unsigned long attrs)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
int i, ents;
struct scatterlist *s;
int nents, enum dma_data_direction dir,
unsigned long attrs)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
debug_dma_unmap_sg(dev, sg, nents, dir);
enum dma_data_direction dir,
unsigned long attrs)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
dma_addr_t addr;
kmemcheck_mark_initialized(page_address(page) + offset, size);
enum dma_data_direction dir,
unsigned long attrs)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
if (ops->unmap_page)
enum dma_data_direction dir,
unsigned long attrs)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
dma_addr_t addr;
BUG_ON(!valid_dma_direction(dir));
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
if (ops->unmap_resource)
size_t size,
enum dma_data_direction dir)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
if (ops->sync_single_for_cpu)
dma_addr_t addr, size_t size,
enum dma_data_direction dir)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
if (ops->sync_single_for_device)
dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
int nelems, enum dma_data_direction dir)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
if (ops->sync_sg_for_cpu)
dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
int nelems, enum dma_data_direction dir)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
if (ops->sync_sg_for_device)
dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr,
dma_addr_t dma_addr, size_t size, unsigned long attrs)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!ops);
if (ops->mmap)
return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
dma_addr_t dma_addr, size_t size,
unsigned long attrs)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!ops);
if (ops->get_sgtable)
return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
dma_addr_t *dma_handle, gfp_t flag,
unsigned long attrs)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
void *cpu_addr;
BUG_ON(!ops);
void *cpu_addr, dma_addr_t dma_handle,
unsigned long attrs)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!ops);
WARN_ON(irqs_disabled());
#ifndef HAVE_ARCH_DMA_SUPPORTED
static inline int dma_supported(struct device *dev, u64 mask)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
if (!ops)
return 0;
#ifndef HAVE_ARCH_DMA_SET_MASK
static inline int dma_set_mask(struct device *dev, u64 mask)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
if (ops->set_dma_mask)
return ops->set_dma_mask(dev, mask);
IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23),
IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24),
IB_DEVICE_RC_IP_CSUM = (1 << 25),
+ /* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */
IB_DEVICE_RAW_IP_CSUM = (1 << 26),
/*
* Devices should set IB_DEVICE_CROSS_CHANNEL if they
IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31),
IB_DEVICE_SG_GAPS_REG = (1ULL << 32),
IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33),
+ /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34),
};
};
enum ib_odp_general_cap_bits {
- IB_ODP_SUPPORT = 1 << 0,
+ IB_ODP_SUPPORT = 1 << 0,
+ IB_ODP_SUPPORT_IMPLICIT = 1 << 1,
};
enum ib_odp_transport_cap_bits {
uint64_t hca_core_clock; /* in KHZ */
struct ib_rss_caps rss_caps;
u32 max_wq_type_rq;
+ u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */
};
enum ib_mtu {
}
}
+static inline enum ib_mtu ib_mtu_int_to_enum(int mtu)
+{
+ if (mtu >= 4096)
+ return IB_MTU_4096;
+ else if (mtu >= 2048)
+ return IB_MTU_2048;
+ else if (mtu >= 1024)
+ return IB_MTU_1024;
+ else if (mtu >= 512)
+ return IB_MTU_512;
+ else
+ return IB_MTU_256;
+}
+
enum ib_port_state {
IB_PORT_NOP = 0,
IB_PORT_DOWN = 1,
#define RDMA_CORE_CAP_PROT_ROCE 0x00200000
#define RDMA_CORE_CAP_PROT_IWARP 0x00400000
#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
+#define RDMA_CORE_CAP_PROT_RAW_PACKET 0x01000000
+#define RDMA_CORE_CAP_PROT_USNIC 0x02000000
#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \
| RDMA_CORE_CAP_IB_MAD \
#define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \
| RDMA_CORE_CAP_OPA_MAD)
+#define RDMA_CORE_PORT_RAW_PACKET (RDMA_CORE_CAP_PROT_RAW_PACKET)
+
+#define RDMA_CORE_PORT_USNIC (RDMA_CORE_CAP_PROT_USNIC)
+
struct ib_port_attr {
u64 subnet_prefix;
enum ib_port_state state;
IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
IB_QP_CREATE_USE_GFP_NOIO = 1 << 7,
IB_QP_CREATE_SCATTER_FCS = 1 << 8,
+ IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9,
/* reserve bits 26-31 for low level drivers' internal use */
IB_QP_CREATE_RESERVED_START = 1 << 26,
IB_QP_CREATE_RESERVED_END = 1 << 31,
} ext;
};
+enum ib_raw_packet_caps {
+ /* Strip cvlan from incoming packet and report it in the matching work
+ * completion is supported.
+ */
+ IB_RAW_PACKET_CAP_CVLAN_STRIPPING = (1 << 0),
+ /* Scatter FCS field of an incoming packet to host memory is supported.
+ */
+ IB_RAW_PACKET_CAP_SCATTER_FCS = (1 << 1),
+ /* Checksum offloads are supported (for both send and receive). */
+ IB_RAW_PACKET_CAP_IP_CSUM = (1 << 2),
+};
+
enum ib_wq_type {
IB_WQT_RQ
};
atomic_t usecnt;
};
+enum ib_wq_flags {
+ IB_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0,
+ IB_WQ_FLAGS_SCATTER_FCS = 1 << 1,
+};
+
struct ib_wq_init_attr {
void *wq_context;
enum ib_wq_type wq_type;
u32 max_sge;
struct ib_cq *cq;
void (*event_handler)(struct ib_event *, void *);
+ u32 create_flags; /* Use enum ib_wq_flags */
};
enum ib_wq_attr_mask {
- IB_WQ_STATE = 1 << 0,
- IB_WQ_CUR_STATE = 1 << 1,
+ IB_WQ_STATE = 1 << 0,
+ IB_WQ_CUR_STATE = 1 << 1,
+ IB_WQ_FLAGS = 1 << 2,
};
struct ib_wq_attr {
enum ib_wq_state wq_state;
enum ib_wq_state curr_wq_state;
+ u32 flags; /* Use enum ib_wq_flags */
+ u32 flags_mask; /* Use enum ib_wq_flags */
};
struct ib_rwq_ind_table {
IB_FLOW_SPEC_UDP = 0x41,
IB_FLOW_SPEC_VXLAN_TUNNEL = 0x50,
IB_FLOW_SPEC_INNER = 0x100,
+ /* Actions */
+ IB_FLOW_SPEC_ACTION_TAG = 0x1000,
};
#define IB_FLOW_SPEC_LAYER_MASK 0xF0
#define IB_FLOW_SPEC_SUPPORT_LAYERS 8
struct ib_flow_tunnel_filter mask;
};
+struct ib_flow_spec_action_tag {
+ enum ib_flow_spec_type type;
+ u16 size;
+ u32 tag_id;
+};
+
union ib_flow_spec {
struct {
u32 type;
struct ib_flow_spec_tcp_udp tcp_udp;
struct ib_flow_spec_ipv6 ipv6;
struct ib_flow_spec_tunnel tunnel;
+ struct ib_flow_spec_action_tag flow_tag;
};
struct ib_flow_attr {
#define IB_DEVICE_NAME_MAX 64
+struct ib_port_cache {
+ struct ib_pkey_cache *pkey;
+ struct ib_gid_table *gid;
+ u8 lmc;
+ enum ib_port_state port_state;
+};
+
struct ib_cache {
rwlock_t lock;
struct ib_event_handler event_handler;
- struct ib_pkey_cache **pkey_cache;
- struct ib_gid_table **gid_cache;
- u8 *lmc_cache;
- enum ib_port_state *port_state_cache;
+ struct ib_port_cache *ports;
};
- struct ib_dma_mapping_ops {
- int (*mapping_error)(struct ib_device *dev,
- u64 dma_addr);
- u64 (*map_single)(struct ib_device *dev,
- void *ptr, size_t size,
- enum dma_data_direction direction);
- void (*unmap_single)(struct ib_device *dev,
- u64 addr, size_t size,
- enum dma_data_direction direction);
- u64 (*map_page)(struct ib_device *dev,
- struct page *page, unsigned long offset,
- size_t size,
- enum dma_data_direction direction);
- void (*unmap_page)(struct ib_device *dev,
- u64 addr, size_t size,
- enum dma_data_direction direction);
- int (*map_sg)(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction);
- void (*unmap_sg)(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction);
- int (*map_sg_attrs)(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction,
- unsigned long attrs);
- void (*unmap_sg_attrs)(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction,
- unsigned long attrs);
- void (*sync_single_for_cpu)(struct ib_device *dev,
- u64 dma_handle,
- size_t size,
- enum dma_data_direction dir);
- void (*sync_single_for_device)(struct ib_device *dev,
- u64 dma_handle,
- size_t size,
- enum dma_data_direction dir);
- void *(*alloc_coherent)(struct ib_device *dev,
- size_t size,
- u64 *dma_handle,
- gfp_t flag);
- void (*free_coherent)(struct ib_device *dev,
- size_t size, void *cpu_addr,
- u64 dma_handle);
- };
-
struct iw_cm_verbs;
struct ib_port_immutable {
};
struct ib_device {
- struct device *dma_device;
-
char name[IB_DEVICE_NAME_MAX];
struct list_head event_handler_list;
struct ib_rwq_ind_table_init_attr *init_attr,
struct ib_udata *udata);
int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
- struct ib_dma_mapping_ops *dma_ops;
struct module *owner;
struct device dev;
return rdma_cap_ib_switch(device) ? 0 : device->phys_port_cnt;
}
+static inline int rdma_is_port_valid(const struct ib_device *device,
+ unsigned int port)
+{
+ return (port >= rdma_start_port(device) &&
+ port <= rdma_end_port(device));
+}
+
static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
{
return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB;
rdma_protocol_roce(device, port_num);
}
+static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET;
+}
+
+static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_USNIC;
+}
+
/**
* rdma_cap_ib_mad - Check if the port of a device supports Infiniband
* Management Datagrams.
*/
static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
{
- if (dev->dma_ops)
- return dev->dma_ops->mapping_error(dev, dma_addr);
- return dma_mapping_error(dev->dma_device, dma_addr);
+ return dma_mapping_error(&dev->dev, dma_addr);
}
/**
void *cpu_addr, size_t size,
enum dma_data_direction direction)
{
- if (dev->dma_ops)
- return dev->dma_ops->map_single(dev, cpu_addr, size, direction);
- return dma_map_single(dev->dma_device, cpu_addr, size, direction);
+ return dma_map_single(&dev->dev, cpu_addr, size, direction);
}
/**
u64 addr, size_t size,
enum dma_data_direction direction)
{
- if (dev->dma_ops)
- dev->dma_ops->unmap_single(dev, addr, size, direction);
- else
- dma_unmap_single(dev->dma_device, addr, size, direction);
- }
-
- static inline u64 ib_dma_map_single_attrs(struct ib_device *dev,
- void *cpu_addr, size_t size,
- enum dma_data_direction direction,
- unsigned long dma_attrs)
- {
- return dma_map_single_attrs(dev->dma_device, cpu_addr, size,
- direction, dma_attrs);
- }
-
- static inline void ib_dma_unmap_single_attrs(struct ib_device *dev,
- u64 addr, size_t size,
- enum dma_data_direction direction,
- unsigned long dma_attrs)
- {
- return dma_unmap_single_attrs(dev->dma_device, addr, size,
- direction, dma_attrs);
+ dma_unmap_single(&dev->dev, addr, size, direction);
}
/**
size_t size,
enum dma_data_direction direction)
{
- if (dev->dma_ops)
- return dev->dma_ops->map_page(dev, page, offset, size, direction);
- return dma_map_page(dev->dma_device, page, offset, size, direction);
+ return dma_map_page(&dev->dev, page, offset, size, direction);
}
/**
u64 addr, size_t size,
enum dma_data_direction direction)
{
- if (dev->dma_ops)
- dev->dma_ops->unmap_page(dev, addr, size, direction);
- else
- dma_unmap_page(dev->dma_device, addr, size, direction);
+ dma_unmap_page(&dev->dev, addr, size, direction);
}
/**
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
- if (dev->dma_ops)
- return dev->dma_ops->map_sg(dev, sg, nents, direction);
- return dma_map_sg(dev->dma_device, sg, nents, direction);
+ return dma_map_sg(&dev->dev, sg, nents, direction);
}
/**
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
- if (dev->dma_ops)
- dev->dma_ops->unmap_sg(dev, sg, nents, direction);
- else
- dma_unmap_sg(dev->dma_device, sg, nents, direction);
+ dma_unmap_sg(&dev->dev, sg, nents, direction);
}
static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
enum dma_data_direction direction,
unsigned long dma_attrs)
{
- if (dev->dma_ops)
- return dev->dma_ops->map_sg_attrs(dev, sg, nents, direction,
- dma_attrs);
- else
- return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
- dma_attrs);
+ return dma_map_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs);
}
static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
enum dma_data_direction direction,
unsigned long dma_attrs)
{
- if (dev->dma_ops)
- return dev->dma_ops->unmap_sg_attrs(dev, sg, nents, direction,
- dma_attrs);
- else
- dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction,
- dma_attrs);
+ dma_unmap_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs);
}
/**
* ib_sg_dma_address - Return the DMA address from a scatter/gather entry
size_t size,
enum dma_data_direction dir)
{
- if (dev->dma_ops)
- dev->dma_ops->sync_single_for_cpu(dev, addr, size, dir);
- else
- dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
+ dma_sync_single_for_cpu(&dev->dev, addr, size, dir);
}
/**
size_t size,
enum dma_data_direction dir)
{
- if (dev->dma_ops)
- dev->dma_ops->sync_single_for_device(dev, addr, size, dir);
- else
- dma_sync_single_for_device(dev->dma_device, addr, size, dir);
+ dma_sync_single_for_device(&dev->dev, addr, size, dir);
}
/**
*/
static inline void *ib_dma_alloc_coherent(struct ib_device *dev,
size_t size,
- u64 *dma_handle,
+ dma_addr_t *dma_handle,
gfp_t flag)
{
- if (dev->dma_ops)
- return dev->dma_ops->alloc_coherent(dev, size, dma_handle, flag);
- else {
- dma_addr_t handle;
- void *ret;
-
- ret = dma_alloc_coherent(dev->dma_device, size, &handle, flag);
- *dma_handle = handle;
- return ret;
- }
+ return dma_alloc_coherent(&dev->dev, size, dma_handle, flag);
}
/**
*/
static inline void ib_dma_free_coherent(struct ib_device *dev,
size_t size, void *cpu_addr,
- u64 dma_handle)
+ dma_addr_t dma_handle)
{
- if (dev->dma_ops)
- dev->dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
- else
- dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
+ dma_free_coherent(&dev->dev, size, cpu_addr, dma_handle);
}
/**
functions require M here.
config CRC32_SELFTEST
- bool "CRC32 perform self test on init"
- default n
+ tristate "CRC32 perform self test on init"
depends on CRC32
help
This option enables the CRC32 library functions to perform a
depends on !NO_DMA
default y
+ config DMA_NOOP_OPS
+ bool
+ depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
+ default n
+
+ config DMA_VIRT_OPS
+ bool
+ depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
+ default n
+
config CHECK_SIGNATURE
bool
depends on this.
config GLOB_SELFTEST
- bool "glob self-test on init"
- default n
+ tristate "glob self-test on init"
depends on GLOB
help
This option enables a simple self-test of the glob_match
config SBITMAP
bool
+config PARMAN
+ tristate
+
+config PRIME_NUMBERS
+ tristate
+
endmenu
sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
flex_proportions.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
- earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
+ earlycpio.o seq_buf.o siphash.o \
+ nmi_backtrace.o nodemask.o win_minmax.o
lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o
- lib-$(CONFIG_HAS_DMA) += dma-noop.o
+ lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o
+ lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o
lib-y += kobject.o klist.o
obj-y += lockref.o
-obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
+obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
bsearch.o find_bit.o llist.o memweight.o kfifo.o \
obj-y += kstrtox.o
obj-$(CONFIG_TEST_BPF) += test_bpf.o
obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
-obj-$(CONFIG_TEST_HASH) += test_hash.o
+obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
obj-$(CONFIG_TEST_KASAN) += test_kasan.o
obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
obj-$(CONFIG_TEST_LKM) += test_module.o
obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
+obj-$(CONFIG_TEST_SORT) += test_sort.o
obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
obj-$(CONFIG_TEST_PRINTF) += test_printf.o
obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
obj-$(CONFIG_TEST_UUID) += test_uuid.o
+obj-$(CONFIG_TEST_PARMAN) += test_parman.o
ifeq ($(CONFIG_DEBUG_KOBJECT),y)
CFLAGS_kobject.o += -DDEBUG
obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
obj-$(CONFIG_CRC_ITU_T) += crc-itu-t.o
obj-$(CONFIG_CRC32) += crc32.o
+obj-$(CONFIG_CRC32_SELFTEST) += crc32test.o
obj-$(CONFIG_CRC7) += crc7.o
obj-$(CONFIG_LIBCRC32C) += libcrc32c.o
obj-$(CONFIG_CRC8) += crc8.o
obj-$(CONFIG_DQL) += dynamic_queue_limits.o
obj-$(CONFIG_GLOB) += glob.o
+obj-$(CONFIG_GLOB_SELFTEST) += globtest.o
obj-$(CONFIG_MPILIB) += mpi/
obj-$(CONFIG_SIGNATURE) += digsig.o
obj-$(CONFIG_FONT_SUPPORT) += fonts/
+obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o
+
hostprogs-y := gen_crc32table
clean-files := crc32table.h
UBSAN_SANITIZE_ubsan.o := n
obj-$(CONFIG_SBITMAP) += sbitmap.o
+
+obj-$(CONFIG_PARMAN) += parman.o
#define RDS_IB_DEFAULT_RECV_WR 1024
#define RDS_IB_DEFAULT_SEND_WR 256
-#define RDS_IB_DEFAULT_FR_WR 512
+#define RDS_IB_DEFAULT_FR_WR 256
+#define RDS_IB_DEFAULT_FR_INV_WR 256
-#define RDS_IB_DEFAULT_RETRY_COUNT 2
+#define RDS_IB_DEFAULT_RETRY_COUNT 1
#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */
/* To control the number of wrs from fastreg */
atomic_t i_fastreg_wrs;
+ atomic_t i_fastunreg_wrs;
/* interrupt handling */
struct tasklet_struct i_send_tasklet;
struct rds_ib_work_ring i_send_ring;
struct rm_data_op *i_data_op;
struct rds_header *i_send_hdrs;
- u64 i_send_hdrs_dma;
+ dma_addr_t i_send_hdrs_dma;
struct rds_ib_send_work *i_sends;
atomic_t i_signaled_sends;
struct rds_ib_incoming *i_ibinc;
u32 i_recv_data_rem;
struct rds_header *i_recv_hdrs;
- u64 i_recv_hdrs_dma;
+ dma_addr_t i_recv_hdrs_dma;
struct rds_ib_recv_work *i_recvs;
u64 i_ack_recv; /* last ACK received */
struct rds_ib_refill_cache i_cache_incs;
struct rds_ib_refill_cache i_cache_frags;
+ atomic_t i_cache_allocs;
/* sending acks */
unsigned long i_ack_flags;
struct rds_header *i_ack;
struct ib_send_wr i_ack_wr;
struct ib_sge i_ack_sge;
- u64 i_ack_dma;
+ dma_addr_t i_ack_dma;
unsigned long i_ack_queued;
/* Flow control related information
/* Batched completions */
unsigned int i_unsignaled_wrs;
+
+ /* Endpoint role in connection */
+ bool i_active_side;
+ atomic_t i_cq_quiesce;
+
+ /* Send/Recv vectors */
+ int i_scq_vector;
+ int i_rcq_vector;
};
/* This assumes that atomic_t is at least 32 bits */
spinlock_t spinlock; /* protect the above */
atomic_t refcount;
struct work_struct free_work;
+ int *vector_load;
};
- #define ibdev_to_node(ibdev) dev_to_node(ibdev->dma_device)
+ #define ibdev_to_node(ibdev) dev_to_node((ibdev)->dev.parent)
#define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev)
/* bits for i_ack_flags */
uint64_t s_ib_rx_refill_from_cq;
uint64_t s_ib_rx_refill_from_thread;
uint64_t s_ib_rx_alloc_limit;
+ uint64_t s_ib_rx_total_frags;
+ uint64_t s_ib_rx_total_incs;
uint64_t s_ib_rx_credit_updates;
uint64_t s_ib_ack_sent;
uint64_t s_ib_ack_send_failure;
uint64_t s_ib_rdma_mr_1m_reused;
uint64_t s_ib_atomic_cswp;
uint64_t s_ib_atomic_fadd;
+ uint64_t s_ib_recv_added_to_cache;
+ uint64_t s_ib_recv_removed_from_cache;
};
extern struct workqueue_struct *rds_ib_wq;
/* ib_stats.c */
DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats);
#define rds_ib_stats_inc(member) rds_stats_inc_which(rds_ib_stats, member)
+#define rds_ib_stats_add(member, count) \
+ rds_stats_add_which(rds_ib_stats, member, count)
unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
unsigned int avail);