]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/commitdiff
Merge tag 'for-next-dma_ops' of git://git.kernel.org/pub/scm/linux/kernel/git/dledfor...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 25 Feb 2017 21:45:43 +0000 (13:45 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 25 Feb 2017 21:45:43 +0000 (13:45 -0800)
Pull rdma DMA mapping updates from Doug Ledford:
 "Drop IB DMA mapping code and use core DMA code instead.

  Bart Van Assche noted that the ib DMA mapping code was significantly
  similar enough to the core DMA mapping code that with a few changes it
  was possible to remove the IB DMA mapping code entirely and switch the
  RDMA stack to use the core DMA mapping code.

  This resulted in a nice set of cleanups, but touched the entire tree
  and has been kept separate for that reason."

* tag 'for-next-dma_ops' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (37 commits)
  IB/rxe, IB/rdmavt: Use dma_virt_ops instead of duplicating it
  IB/core: Remove ib_device.dma_device
  nvme-rdma: Switch from dma_device to dev.parent
  RDS: net: Switch from dma_device to dev.parent
  IB/srpt: Modify a debug statement
  IB/srp: Switch from dma_device to dev.parent
  IB/iser: Switch from dma_device to dev.parent
  IB/IPoIB: Switch from dma_device to dev.parent
  IB/rxe: Switch from dma_device to dev.parent
  IB/vmw_pvrdma: Switch from dma_device to dev.parent
  IB/usnic: Switch from dma_device to dev.parent
  IB/qib: Switch from dma_device to dev.parent
  IB/qedr: Switch from dma_device to dev.parent
  IB/ocrdma: Switch from dma_device to dev.parent
  IB/nes: Remove a superfluous assignment statement
  IB/mthca: Switch from dma_device to dev.parent
  IB/mlx5: Switch from dma_device to dev.parent
  IB/mlx4: Switch from dma_device to dev.parent
  IB/i40iw: Remove a superfluous assignment statement
  IB/hns: Switch from dma_device to dev.parent
  ...

45 files changed:
1  2 
arch/arm/mm/dma-mapping.c
arch/arm64/mm/dma-mapping.c
arch/m68k/kernel/dma.c
arch/mips/cavium-octeon/dma-octeon.c
arch/mips/loongson64/common/dma-swiotlb.c
arch/mips/mm/dma-default.c
arch/powerpc/kernel/pci-common.c
arch/s390/Kconfig
arch/s390/pci/pci.c
arch/x86/kernel/pci-calgary_64.c
arch/x86/kernel/pci-dma.c
arch/xtensa/kernel/pci-dma.c
drivers/infiniband/core/device.c
drivers/infiniband/hw/bnxt_re/main.c
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/cxgb4/provider.c
drivers/infiniband/hw/hfi1/verbs.c
drivers/infiniband/hw/hns/hns_roce_main.c
drivers/infiniband/hw/i40iw/i40iw_verbs.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/nes/nes_verbs.c
drivers/infiniband/hw/ocrdma/ocrdma_main.c
drivers/infiniband/hw/qedr/main.c
drivers/infiniband/hw/qib/qib_verbs.c
drivers/infiniband/hw/usnic/usnic_ib_main.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
drivers/infiniband/sw/rdmavt/Makefile
drivers/infiniband/sw/rdmavt/mr.c
drivers/infiniband/sw/rdmavt/vt.c
drivers/infiniband/sw/rxe/rxe_verbs.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/iser/iscsi_iser.c
drivers/infiniband/ulp/srp/ib_srp.c
drivers/infiniband/ulp/srpt/ib_srpt.c
drivers/iommu/amd_iommu.c
drivers/nvme/host/rdma.c
include/linux/device.h
include/linux/dma-mapping.h
include/rdma/ib_verbs.h
lib/Kconfig
lib/Makefile
net/rds/ib.h

index 6ffdf17e0d5cf6a15ac816ca6317e42e3fee6b61,d26fe1a3568767136f16d61575154bc1d0084616..e309a5e2c9350788e6e651140d2c1efa03c7e667
@@@ -180,7 -180,7 +180,7 @@@ static void arm_dma_sync_single_for_dev
        __dma_page_cpu_to_dev(page, offset, size, dir);
  }
  
- struct dma_map_ops arm_dma_ops = {
const struct dma_map_ops arm_dma_ops = {
        .alloc                  = arm_dma_alloc,
        .free                   = arm_dma_free,
        .mmap                   = arm_dma_mmap,
@@@ -204,7 -204,7 +204,7 @@@ static int arm_coherent_dma_mmap(struc
                 void *cpu_addr, dma_addr_t dma_addr, size_t size,
                 unsigned long attrs);
  
- struct dma_map_ops arm_coherent_dma_ops = {
const struct dma_map_ops arm_coherent_dma_ops = {
        .alloc                  = arm_coherent_dma_alloc,
        .free                   = arm_coherent_dma_free,
        .mmap                   = arm_coherent_dma_mmap,
@@@ -349,7 -349,7 +349,7 @@@ static void __dma_free_buffer(struct pa
  static void *__alloc_from_contiguous(struct device *dev, size_t size,
                                     pgprot_t prot, struct page **ret_page,
                                     const void *caller, bool want_vaddr,
 -                                   int coherent_flag);
 +                                   int coherent_flag, gfp_t gfp);
  
  static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
                                 pgprot_t prot, struct page **ret_page,
@@@ -420,8 -420,7 +420,8 @@@ static int __init atomic_pool_init(void
         */
        if (dev_get_cma_area(NULL))
                ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot,
 -                                    &page, atomic_pool_init, true, NORMAL);
 +                                    &page, atomic_pool_init, true, NORMAL,
 +                                    GFP_KERNEL);
        else
                ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot,
                                           &page, atomic_pool_init, true);
@@@ -595,14 -594,14 +595,14 @@@ static int __free_from_pool(void *start
  static void *__alloc_from_contiguous(struct device *dev, size_t size,
                                     pgprot_t prot, struct page **ret_page,
                                     const void *caller, bool want_vaddr,
 -                                   int coherent_flag)
 +                                   int coherent_flag, gfp_t gfp)
  {
        unsigned long order = get_order(size);
        size_t count = size >> PAGE_SHIFT;
        struct page *page;
        void *ptr = NULL;
  
 -      page = dma_alloc_from_contiguous(dev, count, order);
 +      page = dma_alloc_from_contiguous(dev, count, order, gfp);
        if (!page)
                return NULL;
  
@@@ -656,7 -655,7 +656,7 @@@ static inline pgprot_t __get_dma_pgprot
  #define __get_dma_pgprot(attrs, prot)                         __pgprot(0)
  #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv)        NULL
  #define __alloc_from_pool(size, ret_page)                     NULL
 -#define __alloc_from_contiguous(dev, size, prot, ret, c, wv, coherent_flag)   NULL
 +#define __alloc_from_contiguous(dev, size, prot, ret, c, wv, coherent_flag, gfp)      NULL
  #define __free_from_pool(cpu_addr, size)                      do { } while (0)
  #define __free_from_contiguous(dev, page, cpu_addr, size, wv) do { } while (0)
  #define __dma_free_remap(cpu_addr, size)                      do { } while (0)
@@@ -698,8 -697,7 +698,8 @@@ static void *cma_allocator_alloc(struc
  {
        return __alloc_from_contiguous(args->dev, args->size, args->prot,
                                       ret_page, args->caller,
 -                                     args->want_vaddr, args->coherent_flag);
 +                                     args->want_vaddr, args->coherent_flag,
 +                                     args->gfp);
  }
  
  static void cma_allocator_free(struct arm_dma_free_args *args)
@@@ -1069,7 -1067,7 +1069,7 @@@ static void __dma_page_dev_to_cpu(struc
  int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
                enum dma_data_direction dir, unsigned long attrs)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        struct scatterlist *s;
        int i, j;
  
  void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
                enum dma_data_direction dir, unsigned long attrs)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        struct scatterlist *s;
  
        int i;
  void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
                        int nents, enum dma_data_direction dir)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        struct scatterlist *s;
        int i;
  
  void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
                        int nents, enum dma_data_direction dir)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        struct scatterlist *s;
        int i;
  
@@@ -1173,25 -1171,6 +1173,25 @@@ core_initcall(dma_debug_do_init)
  
  #ifdef CONFIG_ARM_DMA_USE_IOMMU
  
 +static int __dma_info_to_prot(enum dma_data_direction dir, unsigned long attrs)
 +{
 +      int prot = 0;
 +
 +      if (attrs & DMA_ATTR_PRIVILEGED)
 +              prot |= IOMMU_PRIV;
 +
 +      switch (dir) {
 +      case DMA_BIDIRECTIONAL:
 +              return prot | IOMMU_READ | IOMMU_WRITE;
 +      case DMA_TO_DEVICE:
 +              return prot | IOMMU_READ;
 +      case DMA_FROM_DEVICE:
 +              return prot | IOMMU_WRITE;
 +      default:
 +              return prot;
 +      }
 +}
 +
  /* IOMMU */
  
  static int extend_iommu_mapping(struct dma_iommu_mapping *mapping);
@@@ -1314,7 -1293,7 +1314,7 @@@ static struct page **__iommu_alloc_buff
                unsigned long order = get_order(size);
                struct page *page;
  
 -              page = dma_alloc_from_contiguous(dev, count, order);
 +              page = dma_alloc_from_contiguous(dev, count, order, gfp);
                if (!page)
                        goto error;
  
@@@ -1415,8 -1394,7 +1415,8 @@@ __iommu_alloc_remap(struct page **pages
   * Create a mapping in device IO address space for specified pages
   */
  static dma_addr_t
 -__iommu_create_mapping(struct device *dev, struct page **pages, size_t size)
 +__iommu_create_mapping(struct device *dev, struct page **pages, size_t size,
 +                     unsigned long attrs)
  {
        struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
        unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
  
                len = (j - i) << PAGE_SHIFT;
                ret = iommu_map(mapping->domain, iova, phys, len,
 -                              IOMMU_READ|IOMMU_WRITE);
 +                              __dma_info_to_prot(DMA_BIDIRECTIONAL, attrs));
                if (ret < 0)
                        goto fail;
                iova += len;
@@@ -1498,8 -1476,7 +1498,8 @@@ static struct page **__iommu_get_pages(
  }
  
  static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp,
 -                                dma_addr_t *handle, int coherent_flag)
 +                                dma_addr_t *handle, int coherent_flag,
 +                                unsigned long attrs)
  {
        struct page *page;
        void *addr;
        if (!addr)
                return NULL;
  
 -      *handle = __iommu_create_mapping(dev, &page, size);
 +      *handle = __iommu_create_mapping(dev, &page, size, attrs);
        if (*handle == DMA_ERROR_CODE)
                goto err_mapping;
  
@@@ -1545,7 -1522,7 +1545,7 @@@ static void *__arm_iommu_alloc_attrs(st
  
        if (coherent_flag  == COHERENT || !gfpflags_allow_blocking(gfp))
                return __iommu_alloc_simple(dev, size, gfp, handle,
 -                                          coherent_flag);
 +                                          coherent_flag, attrs);
  
        /*
         * Following is a work-around (a.k.a. hack) to prevent pages
        if (!pages)
                return NULL;
  
 -      *handle = __iommu_create_mapping(dev, pages, size);
 +      *handle = __iommu_create_mapping(dev, pages, size, attrs);
        if (*handle == DMA_ERROR_CODE)
                goto err_buffer;
  
@@@ -1695,6 -1672,27 +1695,6 @@@ static int arm_iommu_get_sgtable(struc
                                         GFP_KERNEL);
  }
  
 -static int __dma_direction_to_prot(enum dma_data_direction dir)
 -{
 -      int prot;
 -
 -      switch (dir) {
 -      case DMA_BIDIRECTIONAL:
 -              prot = IOMMU_READ | IOMMU_WRITE;
 -              break;
 -      case DMA_TO_DEVICE:
 -              prot = IOMMU_READ;
 -              break;
 -      case DMA_FROM_DEVICE:
 -              prot = IOMMU_WRITE;
 -              break;
 -      default:
 -              prot = 0;
 -      }
 -
 -      return prot;
 -}
 -
  /*
   * Map a part of the scatter-gather list into contiguous io address space
   */
@@@ -1724,7 -1722,7 +1724,7 @@@ static int __map_sg_chunk(struct devic
                if (!is_coherent && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
                        __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
  
 -              prot = __dma_direction_to_prot(dir);
 +              prot = __dma_info_to_prot(dir, attrs);
  
                ret = iommu_map(mapping->domain, iova, phys, len, prot);
                if (ret < 0)
@@@ -1932,7 -1930,7 +1932,7 @@@ static dma_addr_t arm_coherent_iommu_ma
        if (dma_addr == DMA_ERROR_CODE)
                return dma_addr;
  
 -      prot = __dma_direction_to_prot(dir);
 +      prot = __dma_info_to_prot(dir, attrs);
  
        ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot);
        if (ret < 0)
@@@ -2038,7 -2036,7 +2038,7 @@@ static dma_addr_t arm_iommu_map_resourc
        if (dma_addr == DMA_ERROR_CODE)
                return dma_addr;
  
 -      prot = __dma_direction_to_prot(dir) | IOMMU_MMIO;
 +      prot = __dma_info_to_prot(dir, attrs) | IOMMU_MMIO;
  
        ret = iommu_map(mapping->domain, dma_addr, addr, len, prot);
        if (ret < 0)
@@@ -2101,7 -2099,7 +2101,7 @@@ static void arm_iommu_sync_single_for_d
        __dma_page_cpu_to_dev(page, offset, size, dir);
  }
  
- struct dma_map_ops iommu_ops = {
const struct dma_map_ops iommu_ops = {
        .alloc          = arm_iommu_alloc_attrs,
        .free           = arm_iommu_free_attrs,
        .mmap           = arm_iommu_mmap_attrs,
        .unmap_resource         = arm_iommu_unmap_resource,
  };
  
- struct dma_map_ops iommu_coherent_ops = {
const struct dma_map_ops iommu_coherent_ops = {
        .alloc          = arm_coherent_iommu_alloc_attrs,
        .free           = arm_coherent_iommu_free_attrs,
        .mmap           = arm_coherent_iommu_mmap_attrs,
@@@ -2321,7 -2319,7 +2321,7 @@@ void arm_iommu_detach_device(struct dev
  }
  EXPORT_SYMBOL_GPL(arm_iommu_detach_device);
  
- static struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent)
+ static const struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent)
  {
        return coherent ? &iommu_coherent_ops : &iommu_ops;
  }
@@@ -2376,7 -2374,7 +2376,7 @@@ static void arm_teardown_iommu_dma_ops(
  
  #endif        /* CONFIG_ARM_DMA_USE_IOMMU */
  
- static struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
+ static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
  {
        return coherent ? &arm_coherent_dma_ops : &arm_dma_ops;
  }
  void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
                        const struct iommu_ops *iommu, bool coherent)
  {
-       struct dma_map_ops *dma_ops;
+       const struct dma_map_ops *dma_ops;
  
        dev->archdata.dma_coherent = coherent;
        if (arm_setup_iommu_dma_ops(dev, dma_base, size, iommu))
index aff1d0afeb1eaded6f85bf8398b2e793cc104524,dbab4c6c084b39ecdfd767d3342d6848c21f58d1..81cdb2e844ed9fe80e4192315647ccc9245b8003
@@@ -107,7 -107,7 +107,7 @@@ static void *__dma_alloc_coherent(struc
                void *addr;
  
                page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
 -                                                      get_order(size));
 +                                               get_order(size), flags);
                if (!page)
                        return NULL;
  
@@@ -211,8 -211,7 +211,8 @@@ static dma_addr_t __swiotlb_map_page(st
        dma_addr_t dev_addr;
  
        dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
 -      if (!is_device_dma_coherent(dev))
 +      if (!is_device_dma_coherent(dev) &&
 +          (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
                __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
  
        return dev_addr;
@@@ -223,8 -222,7 +223,8 @@@ static void __swiotlb_unmap_page(struc
                                 size_t size, enum dma_data_direction dir,
                                 unsigned long attrs)
  {
 -      if (!is_device_dma_coherent(dev))
 +      if (!is_device_dma_coherent(dev) &&
 +          (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
                __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
        swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
  }
@@@ -237,8 -235,7 +237,8 @@@ static int __swiotlb_map_sg_attrs(struc
        int i, ret;
  
        ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
 -      if (!is_device_dma_coherent(dev))
 +      if (!is_device_dma_coherent(dev) &&
 +          (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
                for_each_sg(sgl, sg, ret, i)
                        __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
                                       sg->length, dir);
@@@ -254,8 -251,7 +254,8 @@@ static void __swiotlb_unmap_sg_attrs(st
        struct scatterlist *sg;
        int i;
  
 -      if (!is_device_dma_coherent(dev))
 +      if (!is_device_dma_coherent(dev) &&
 +          (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
                for_each_sg(sgl, sg, nelems, i)
                        __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
                                         sg->length, dir);
@@@ -356,14 -352,7 +356,14 @@@ static int __swiotlb_dma_supported(stru
        return 1;
  }
  
- static struct dma_map_ops swiotlb_dma_ops = {
 +static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr)
 +{
 +      if (swiotlb)
 +              return swiotlb_dma_mapping_error(hwdev, addr);
 +      return 0;
 +}
 +
+ static const struct dma_map_ops swiotlb_dma_ops = {
        .alloc = __dma_alloc,
        .free = __dma_free,
        .mmap = __swiotlb_mmap,
        .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
        .sync_sg_for_device = __swiotlb_sync_sg_for_device,
        .dma_supported = __swiotlb_dma_supported,
 -      .mapping_error = swiotlb_dma_mapping_error,
 +      .mapping_error = __swiotlb_dma_mapping_error,
  };
  
  static int __init atomic_pool_init(void)
  
        if (dev_get_cma_area(NULL))
                page = dma_alloc_from_contiguous(NULL, nr_pages,
 -                                                      pool_size_order);
 +                                               pool_size_order, GFP_KERNEL);
        else
                page = alloc_pages(GFP_DMA, pool_size_order);
  
@@@ -516,7 -505,7 +516,7 @@@ static int __dummy_dma_supported(struc
        return 0;
  }
  
- struct dma_map_ops dummy_dma_ops = {
const struct dma_map_ops dummy_dma_ops = {
        .alloc                  = __dummy_alloc,
        .free                   = __dummy_free,
        .mmap                   = __dummy_mmap,
@@@ -569,7 -558,7 +569,7 @@@ static void *__iommu_alloc_attrs(struc
                                 unsigned long attrs)
  {
        bool coherent = is_device_dma_coherent(dev);
 -      int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent);
 +      int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
        size_t iosize = size;
        void *addr;
  
@@@ -723,7 -712,7 +723,7 @@@ static dma_addr_t __iommu_map_page(stru
                                   unsigned long attrs)
  {
        bool coherent = is_device_dma_coherent(dev);
 -      int prot = dma_direction_to_prot(dir, coherent);
 +      int prot = dma_info_to_prot(dir, coherent, attrs);
        dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
  
        if (!iommu_dma_mapping_error(dev, dev_addr) &&
@@@ -781,7 -770,7 +781,7 @@@ static int __iommu_map_sg_attrs(struct 
                __iommu_sync_sg_for_device(dev, sgl, nelems, dir);
  
        return iommu_dma_map_sg(dev, sgl, nelems,
 -                      dma_direction_to_prot(dir, coherent));
 +                              dma_info_to_prot(dir, coherent, attrs));
  }
  
  static void __iommu_unmap_sg_attrs(struct device *dev,
        iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs);
  }
  
- static struct dma_map_ops iommu_dma_ops = {
+ static const struct dma_map_ops iommu_dma_ops = {
        .alloc = __iommu_alloc_attrs,
        .free = __iommu_free_attrs,
        .mmap = __iommu_mmap_attrs,
        .sync_sg_for_device = __iommu_sync_sg_for_device,
        .map_resource = iommu_dma_map_resource,
        .unmap_resource = iommu_dma_unmap_resource,
 -      .dma_supported = iommu_dma_supported,
        .mapping_error = iommu_dma_mapping_error,
  };
  
@@@ -841,21 -831,14 +841,21 @@@ static bool do_iommu_attach(struct devi
         * then the IOMMU core will have already configured a group for this
         * device, and allocated the default domain for that group.
         */
 -      if (!domain || iommu_dma_init_domain(domain, dma_base, size, dev)) {
 -              pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
 -                      dev_name(dev));
 -              return false;
 +      if (!domain)
 +              goto out_err;
 +
 +      if (domain->type == IOMMU_DOMAIN_DMA) {
 +              if (iommu_dma_init_domain(domain, dma_base, size, dev))
 +                      goto out_err;
 +
-               dev->archdata.dma_ops = &iommu_dma_ops;
++              dev->dma_ops = &iommu_dma_ops;
        }
  
 -      dev->dma_ops = &iommu_dma_ops;
        return true;
 +out_err:
 +      pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
 +               dev_name(dev));
 +      return false;
  }
  
  static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops,
@@@ -958,7 -941,7 +958,7 @@@ static void __iommu_setup_dma_ops(struc
  
  void arch_teardown_dma_ops(struct device *dev)
  {
-       dev->archdata.dma_ops = NULL;
+       dev->dma_ops = NULL;
  }
  
  #else
@@@ -972,8 -955,8 +972,8 @@@ static void __iommu_setup_dma_ops(struc
  void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
                        const struct iommu_ops *iommu, bool coherent)
  {
-       if (!dev->archdata.dma_ops)
-               dev->archdata.dma_ops = &swiotlb_dma_ops;
+       if (!dev->dma_ops)
+               dev->dma_ops = &swiotlb_dma_ops;
  
        dev->archdata.dma_coherent = coherent;
        __iommu_setup_dma_ops(dev, dma_base, size, iommu);
diff --combined arch/m68k/kernel/dma.c
index 1e4f386ba31e22bfe96182b7253b620504360077,0fc5dabb4a42ef388bc757260eaecfd2e58242ea..87ef73a9385672ba6af2b8af42fda3043e3d4eba
@@@ -110,8 -110,8 +110,8 @@@ static void m68k_dma_sync_single_for_de
                cache_clear(handle, size);
                break;
        default:
 -              if (printk_ratelimit())
 -                      printk("dma_sync_single_for_device: unsupported dir %u\n", dir);
 +              pr_err_ratelimited("dma_sync_single_for_device: unsupported dir %u\n",
 +                                 dir);
                break;
        }
  }
@@@ -158,7 -158,7 +158,7 @@@ static int m68k_dma_map_sg(struct devic
        return nents;
  }
  
- struct dma_map_ops m68k_dma_ops = {
const struct dma_map_ops m68k_dma_ops = {
        .alloc                  = m68k_dma_alloc,
        .free                   = m68k_dma_free,
        .map_page               = m68k_dma_map_page,
index 1226965e1e4f7527563450523f2be053265f377e,897d32c888ee2ee956352be1f6af82297266be83..c64bd87f0b6ef06b5538459f32023cba29015289
@@@ -164,14 -164,19 +164,14 @@@ static void *octeon_dma_alloc_coherent(
        /* ignore region specifiers */
        gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
  
 -#ifdef CONFIG_ZONE_DMA
 -      if (dev == NULL)
 +      if (IS_ENABLED(CONFIG_ZONE_DMA) && dev == NULL)
                gfp |= __GFP_DMA;
 -      else if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
 +      else if (IS_ENABLED(CONFIG_ZONE_DMA) &&
 +               dev->coherent_dma_mask <= DMA_BIT_MASK(24))
                gfp |= __GFP_DMA;
 -      else
 -#endif
 -#ifdef CONFIG_ZONE_DMA32
 -           if (dev->coherent_dma_mask <= DMA_BIT_MASK(32))
 +      else if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
 +               dev->coherent_dma_mask <= DMA_BIT_MASK(32))
                gfp |= __GFP_DMA32;
 -      else
 -#endif
 -              ;
  
        /* Don't invoke OOM killer */
        gfp |= __GFP_NORETRY;
@@@ -200,7 -205,7 +200,7 @@@ static phys_addr_t octeon_unity_dma_to_
  }
  
  struct octeon_dma_map_ops {
-       struct dma_map_ops dma_map_ops;
+       const struct dma_map_ops dma_map_ops;
        dma_addr_t (*phys_to_dma)(struct device *dev, phys_addr_t paddr);
        phys_addr_t (*dma_to_phys)(struct device *dev, dma_addr_t daddr);
  };
@@@ -328,7 -333,7 +328,7 @@@ static struct octeon_dma_map_ops _octeo
        },
  };
  
- struct dma_map_ops *octeon_pci_dma_map_ops;
const struct dma_map_ops *octeon_pci_dma_map_ops;
  
  void __init octeon_pci_dma_init(void)
  {
index df7235e334997a2398b4ddf6af96347a5d2be764,7296df043d9208fbf96e7ffe0c036ca2f1843dfd..178ca17a5667ee4d8584bfc585d69d3e88c7a6ba
@@@ -17,14 -17,22 +17,14 @@@ static void *loongson_dma_alloc_coheren
        /* ignore region specifiers */
        gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
  
 -#ifdef CONFIG_ISA
 -      if (dev == NULL)
 +      if ((IS_ENABLED(CONFIG_ISA) && dev == NULL) ||
 +          (IS_ENABLED(CONFIG_ZONE_DMA) &&
 +           dev->coherent_dma_mask < DMA_BIT_MASK(32)))
                gfp |= __GFP_DMA;
 -      else
 -#endif
 -#ifdef CONFIG_ZONE_DMA
 -      if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
 -              gfp |= __GFP_DMA;
 -      else
 -#endif
 -#ifdef CONFIG_ZONE_DMA32
 -      if (dev->coherent_dma_mask < DMA_BIT_MASK(40))
 +      else if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
 +               dev->coherent_dma_mask < DMA_BIT_MASK(40))
                gfp |= __GFP_DMA32;
 -      else
 -#endif
 -      ;
 +
        gfp |= __GFP_NORETRY;
  
        ret = swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
@@@ -114,7 -122,7 +114,7 @@@ phys_addr_t dma_to_phys(struct device *
        return daddr;
  }
  
- static struct dma_map_ops loongson_dma_map_ops = {
+ static const struct dma_map_ops loongson_dma_map_ops = {
        .alloc = loongson_dma_alloc_coherent,
        .free = loongson_dma_free_coherent,
        .map_page = loongson_dma_map_page,
index 1895a692efd4b0d80ebdaf8ab51d00662b9ec71c,1cb84472cb582977db32ac4b9a8341853366a495..fe8df14b616984a702416dcdb442e5a2c2f3761d
@@@ -148,8 -148,8 +148,8 @@@ static void *mips_dma_alloc_coherent(st
        gfp = massage_gfp_flags(dev, gfp);
  
        if (IS_ENABLED(CONFIG_DMA_CMA) && gfpflags_allow_blocking(gfp))
 -              page = dma_alloc_from_contiguous(dev,
 -                                      count, get_order(size));
 +              page = dma_alloc_from_contiguous(dev, count, get_order(size),
 +                                               gfp);
        if (!page)
                page = alloc_pages(gfp, get_order(size));
  
@@@ -417,7 -417,7 +417,7 @@@ void dma_cache_sync(struct device *dev
  
  EXPORT_SYMBOL(dma_cache_sync);
  
- static struct dma_map_ops mips_default_dma_map_ops = {
+ static const struct dma_map_ops mips_default_dma_map_ops = {
        .alloc = mips_dma_alloc_coherent,
        .free = mips_dma_free_coherent,
        .mmap = mips_dma_mmap,
        .dma_supported = mips_dma_supported
  };
  
- struct dma_map_ops *mips_dma_map_ops = &mips_default_dma_map_ops;
const struct dma_map_ops *mips_dma_map_ops = &mips_default_dma_map_ops;
  EXPORT_SYMBOL(mips_dma_map_ops);
  
  #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
index a3f5334f5d8cfa46da9f0cbfa6b976dd4ed77950,09db4778435c3d33f264af092952792db8a3b8a1..8e6fde8d28f362649f61b859bcf99d20ec06a916
@@@ -25,7 -25,6 +25,7 @@@
  #include <linux/of_address.h>
  #include <linux/of_pci.h>
  #include <linux/mm.h>
 +#include <linux/shmem_fs.h>
  #include <linux/list.h>
  #include <linux/syscalls.h>
  #include <linux/irq.h>
@@@ -60,14 -59,14 +60,14 @@@ resource_size_t isa_mem_base
  EXPORT_SYMBOL(isa_mem_base);
  
  
- static struct dma_map_ops *pci_dma_ops = &dma_direct_ops;
+ static const struct dma_map_ops *pci_dma_ops = &dma_direct_ops;
  
- void set_pci_dma_ops(struct dma_map_ops *dma_ops)
+ void set_pci_dma_ops(const struct dma_map_ops *dma_ops)
  {
        pci_dma_ops = dma_ops;
  }
  
- struct dma_map_ops *get_pci_dma_ops(void)
const struct dma_map_ops *get_pci_dma_ops(void)
  {
        return pci_dma_ops;
  }
diff --combined arch/s390/Kconfig
index 2ef031bee7ab38324c2a426d2d7719d227d7dfc1,38ad42358c41edca78d251005ea1e2015fcc138e..d5c1073a2584cc383765a97048ec6e872e4546f1
@@@ -62,6 -62,9 +62,6 @@@ config PCI_QUIRK
  config ARCH_SUPPORTS_UPROBES
        def_bool y
  
 -config DEBUG_RODATA
 -      def_bool y
 -
  config S390
        def_bool y
        select ARCH_HAS_DEVMEM_IS_ALLOWED
        select ARCH_HAS_GCOV_PROFILE_ALL
        select ARCH_HAS_GIGANTIC_PAGE
        select ARCH_HAS_KCOV
 +      select ARCH_HAS_SET_MEMORY
        select ARCH_HAS_SG_CHAIN
 +      select ARCH_HAS_STRICT_KERNEL_RWX
 +      select ARCH_HAS_STRICT_MODULE_RWX
        select ARCH_HAS_UBSAN_SANITIZE_ALL
        select ARCH_HAVE_NMI_SAFE_CMPXCHG
        select ARCH_INLINE_READ_LOCK
        select HAVE_DEBUG_KMEMLEAK
        select HAVE_DMA_API_DEBUG
        select HAVE_DMA_CONTIGUOUS
+       select DMA_NOOP_OPS
        select HAVE_DYNAMIC_FTRACE
        select HAVE_DYNAMIC_FTRACE_WITH_REGS
        select HAVE_EFFICIENT_UNALIGNED_ACCESS
diff --combined arch/s390/pci/pci.c
index 4c0fa9b3b2a003cf4acf191631432f54001c6901,82abef8b8574eee49c5a64f601fb13b2bbcf3d32..364b9d824be30bc7825bae3acce72231d8d1bbe1
@@@ -224,8 -224,8 +224,8 @@@ static int zpci_cfg_load(struct zpci_de
  
        rc = zpci_load(&data, req, offset);
        if (!rc) {
 -              data = data << ((8 - len) * 8);
 -              data = le64_to_cpu(data);
 +              data = le64_to_cpu((__force __le64) data);
 +              data >>= (8 - len) * 8;
                *val = (u32) data;
        } else
                *val = 0xffffffff;
@@@ -238,8 -238,8 +238,8 @@@ static int zpci_cfg_store(struct zpci_d
        u64 data = val;
        int rc;
  
 -      data = cpu_to_le64(data);
 -      data = data >> ((8 - len) * 8);
 +      data <<= (8 - len) * 8;
 +      data = (__force u64) cpu_to_le64(data);
        rc = zpci_store(data, req, offset);
        return rc;
  }
@@@ -641,7 -641,7 +641,7 @@@ int pcibios_add_device(struct pci_dev *
        int i;
  
        pdev->dev.groups = zpci_attr_groups;
-       pdev->dev.archdata.dma_ops = &s390_pci_dma_ops;
+       pdev->dev.dma_ops = &s390_pci_dma_ops;
        zpci_map_resources(pdev);
  
        for (i = 0; i < PCI_BAR_COUNT; i++) {
index d47517941bbc03ee288848561c54b7f791a97e76,5070320780c687b0ebd4c0d4af6204448305aa61..0c150c06fa5a5642a6630cfeed2b6a74a1acab17
@@@ -296,7 -296,7 +296,7 @@@ static void iommu_free(struct iommu_tab
  
        /* were we called with bad_dma_address? */
        badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE);
 -      if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) {
 +      if (unlikely(dma_addr < badend)) {
                WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA "
                       "address 0x%Lx\n", dma_addr);
                return;
@@@ -478,7 -478,7 +478,7 @@@ static void calgary_free_coherent(struc
        free_pages((unsigned long)vaddr, get_order(size));
  }
  
- static struct dma_map_ops calgary_dma_ops = {
+ static const struct dma_map_ops calgary_dma_ops = {
        .alloc = calgary_alloc_coherent,
        .free = calgary_free_coherent,
        .map_sg = calgary_map_sg,
@@@ -1177,7 -1177,7 +1177,7 @@@ static int __init calgary_init(void
                tbl = find_iommu_table(&dev->dev);
  
                if (translation_enabled(tbl))
-                       dev->dev.archdata.dma_ops = &calgary_dma_ops;
+                       dev->dev.dma_ops = &calgary_dma_ops;
        }
  
        return ret;
@@@ -1201,7 -1201,7 +1201,7 @@@ error
                calgary_disable_translation(dev);
                calgary_free_bus(dev);
                pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */
-               dev->dev.archdata.dma_ops = NULL;
+               dev->dev.dma_ops = NULL;
        } while (1);
  
        return ret;
index d5c223c9cf1117e4cd522a4774d0b0f05ac4cf08,76f4c039baaee1aabdfd3153e6d7615d5caaf263..3a216ec869cdbafc00ec89ab88fb412b0bb3f462
@@@ -17,7 -17,7 +17,7 @@@
  
  static int forbid_dac __read_mostly;
  
- struct dma_map_ops *dma_ops = &nommu_dma_ops;
const struct dma_map_ops *dma_ops = &nommu_dma_ops;
  EXPORT_SYMBOL(dma_ops);
  
  static int iommu_sac_force __read_mostly;
@@@ -91,8 -91,7 +91,8 @@@ again
        page = NULL;
        /* CMA can be used only in the context which permits sleeping */
        if (gfpflags_allow_blocking(flag)) {
 -              page = dma_alloc_from_contiguous(dev, count, get_order(size));
 +              page = dma_alloc_from_contiguous(dev, count, get_order(size),
 +                                               flag);
                if (page && page_to_phys(page) + size > dma_mask) {
                        dma_release_from_contiguous(dev, page, count);
                        page = NULL;
@@@ -215,7 -214,7 +215,7 @@@ early_param("iommu", iommu_setup)
  
  int dma_supported(struct device *dev, u64 mask)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
  
  #ifdef CONFIG_PCI
        if (mask > 0xffffffff && forbid_dac > 0) {
index 34c1f9fa6acc9e13129b37ec4be664def8540cac,ecec5265a66d603468809ad04c55f6baaa73786a..cec86a1c2acce6845238da7ef1994e1e1ef1a6f0
@@@ -158,8 -158,7 +158,8 @@@ static void *xtensa_dma_alloc(struct de
                flag |= GFP_DMA;
  
        if (gfpflags_allow_blocking(flag))
 -              page = dma_alloc_from_contiguous(dev, count, get_order(size));
 +              page = dma_alloc_from_contiguous(dev, count, get_order(size),
 +                                               flag);
  
        if (!page)
                page = alloc_pages(flag, get_order(size));
@@@ -250,7 -249,7 +250,7 @@@ int xtensa_dma_mapping_error(struct dev
        return 0;
  }
  
- struct dma_map_ops xtensa_dma_map_ops = {
const struct dma_map_ops xtensa_dma_map_ops = {
        .alloc = xtensa_dma_alloc,
        .free = xtensa_dma_free,
        .map_page = xtensa_map_page,
index f2e48655a906bbe16d0923905288e8fbf6af6e35,cac1518de36e29ff6276e8eb7e7860f1e459d571..a63e8400ea3b0c9e964b54140edd4f1fd758b47d
@@@ -333,6 -333,15 +333,15 @@@ int ib_register_device(struct ib_devic
        int ret;
        struct ib_client *client;
        struct ib_udata uhw = {.outlen = 0, .inlen = 0};
+       struct device *parent = device->dev.parent;
+       WARN_ON_ONCE(!parent);
+       if (!device->dev.dma_ops)
+               device->dev.dma_ops = parent->dma_ops;
+       if (!device->dev.dma_mask)
+               device->dev.dma_mask = parent->dma_mask;
+       if (!device->dev.coherent_dma_mask)
+               device->dev.coherent_dma_mask = parent->coherent_dma_mask;
  
        mutex_lock(&device_mutex);
  
@@@ -659,7 -668,7 +668,7 @@@ int ib_query_port(struct ib_device *dev
        union ib_gid gid;
        int err;
  
 -      if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
 +      if (!rdma_is_port_valid(device, port_num))
                return -EINVAL;
  
        memset(port_attr, 0, sizeof(*port_attr));
@@@ -825,7 -834,7 +834,7 @@@ int ib_modify_port(struct ib_device *de
        if (!device->modify_port)
                return -ENOSYS;
  
 -      if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
 +      if (!rdma_is_port_valid(device, port_num))
                return -EINVAL;
  
        return device->modify_port(device, port_num, port_modify_mask,
index bd452a92b386dfe4bcde2dd64c3fd97b0a0f3081,0000000000000000000000000000000000000000..5d355401179b8ae5107e411421c673db86164696
mode 100644,000000..100644
--- /dev/null
@@@ -1,1315 -1,0 +1,1315 @@@
-       ibdev->dma_device = &rdev->en_dev->pdev->dev;
 +/*
 + * Broadcom NetXtreme-E RoCE driver.
 + *
 + * Copyright (c) 2016 - 2017, Broadcom. All rights reserved.  The term
 + * Broadcom refers to Broadcom Limited and/or its subsidiaries.
 + *
 + * This software is available to you under a choice of one of two
 + * licenses.  You may choose to be licensed under the terms of the GNU
 + * General Public License (GPL) Version 2, available from the file
 + * COPYING in the main directory of this source tree, or the
 + * BSD license below:
 + *
 + * Redistribution and use in source and binary forms, with or without
 + * modification, are permitted provided that the following conditions
 + * are met:
 + *
 + * 1. Redistributions of source code must retain the above copyright
 + *    notice, this list of conditions and the following disclaimer.
 + * 2. Redistributions in binary form must reproduce the above copyright
 + *    notice, this list of conditions and the following disclaimer in
 + *    the documentation and/or other materials provided with the
 + *    distribution.
 + *
 + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
 + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
 + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 + *
 + * Description: Main component of the bnxt_re driver
 + */
 +
 +#include <linux/module.h>
 +#include <linux/netdevice.h>
 +#include <linux/ethtool.h>
 +#include <linux/mutex.h>
 +#include <linux/list.h>
 +#include <linux/rculist.h>
 +#include <linux/spinlock.h>
 +#include <linux/pci.h>
 +#include <net/dcbnl.h>
 +#include <net/ipv6.h>
 +#include <net/addrconf.h>
 +#include <linux/if_ether.h>
 +
 +#include <rdma/ib_verbs.h>
 +#include <rdma/ib_user_verbs.h>
 +#include <rdma/ib_umem.h>
 +#include <rdma/ib_addr.h>
 +
 +#include "bnxt_ulp.h"
 +#include "roce_hsi.h"
 +#include "qplib_res.h"
 +#include "qplib_sp.h"
 +#include "qplib_fp.h"
 +#include "qplib_rcfw.h"
 +#include "bnxt_re.h"
 +#include "ib_verbs.h"
 +#include <rdma/bnxt_re-abi.h>
 +#include "bnxt.h"
 +static char version[] =
 +              BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n";
 +
 +MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
 +MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
 +MODULE_LICENSE("Dual BSD/GPL");
 +MODULE_VERSION(ROCE_DRV_MODULE_VERSION);
 +
 +/* globals */
 +static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list);
 +/* Mutex to protect the list of bnxt_re devices added */
 +static DEFINE_MUTEX(bnxt_re_dev_lock);
 +static struct workqueue_struct *bnxt_re_wq;
 +
 +/* for handling bnxt_en callbacks later */
 +static void bnxt_re_stop(void *p)
 +{
 +}
 +
 +static void bnxt_re_start(void *p)
 +{
 +}
 +
 +static void bnxt_re_sriov_config(void *p, int num_vfs)
 +{
 +}
 +
 +static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
 +      .ulp_async_notifier = NULL,
 +      .ulp_stop = bnxt_re_stop,
 +      .ulp_start = bnxt_re_start,
 +      .ulp_sriov_config = bnxt_re_sriov_config
 +};
 +
 +/* RoCE -> Net driver */
 +
 +/* Driver registration routines used to let the networking driver (bnxt_en)
 + * to know that the RoCE driver is now installed
 + */
 +static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev, bool lock_wait)
 +{
 +      struct bnxt_en_dev *en_dev;
 +      int rc;
 +
 +      if (!rdev)
 +              return -EINVAL;
 +
 +      en_dev = rdev->en_dev;
 +      /* Acquire rtnl lock if it is not invokded from netdev event */
 +      if (lock_wait)
 +              rtnl_lock();
 +
 +      rc = en_dev->en_ops->bnxt_unregister_device(rdev->en_dev,
 +                                                  BNXT_ROCE_ULP);
 +      if (lock_wait)
 +              rtnl_unlock();
 +      return rc;
 +}
 +
 +static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
 +{
 +      struct bnxt_en_dev *en_dev;
 +      int rc = 0;
 +
 +      if (!rdev)
 +              return -EINVAL;
 +
 +      en_dev = rdev->en_dev;
 +
 +      rtnl_lock();
 +      rc = en_dev->en_ops->bnxt_register_device(en_dev, BNXT_ROCE_ULP,
 +                                                &bnxt_re_ulp_ops, rdev);
 +      rtnl_unlock();
 +      return rc;
 +}
 +
 +static int bnxt_re_free_msix(struct bnxt_re_dev *rdev, bool lock_wait)
 +{
 +      struct bnxt_en_dev *en_dev;
 +      int rc;
 +
 +      if (!rdev)
 +              return -EINVAL;
 +
 +      en_dev = rdev->en_dev;
 +
 +      if (lock_wait)
 +              rtnl_lock();
 +
 +      rc = en_dev->en_ops->bnxt_free_msix(rdev->en_dev, BNXT_ROCE_ULP);
 +
 +      if (lock_wait)
 +              rtnl_unlock();
 +      return rc;
 +}
 +
 +static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
 +{
 +      int rc = 0, num_msix_want = BNXT_RE_MIN_MSIX, num_msix_got;
 +      struct bnxt_en_dev *en_dev;
 +
 +      if (!rdev)
 +              return -EINVAL;
 +
 +      en_dev = rdev->en_dev;
 +
 +      rtnl_lock();
 +      num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP,
 +                                                       rdev->msix_entries,
 +                                                       num_msix_want);
 +      if (num_msix_got < BNXT_RE_MIN_MSIX) {
 +              rc = -EINVAL;
 +              goto done;
 +      }
 +      if (num_msix_got != num_msix_want) {
 +              dev_warn(rdev_to_dev(rdev),
 +                       "Requested %d MSI-X vectors, got %d\n",
 +                       num_msix_want, num_msix_got);
 +      }
 +      rdev->num_msix = num_msix_got;
 +done:
 +      rtnl_unlock();
 +      return rc;
 +}
 +
 +static void bnxt_re_init_hwrm_hdr(struct bnxt_re_dev *rdev, struct input *hdr,
 +                                u16 opcd, u16 crid, u16 trid)
 +{
 +      hdr->req_type = cpu_to_le16(opcd);
 +      hdr->cmpl_ring = cpu_to_le16(crid);
 +      hdr->target_id = cpu_to_le16(trid);
 +}
 +
 +static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
 +                              int msg_len, void *resp, int resp_max_len,
 +                              int timeout)
 +{
 +      fw_msg->msg = msg;
 +      fw_msg->msg_len = msg_len;
 +      fw_msg->resp = resp;
 +      fw_msg->resp_max_len = resp_max_len;
 +      fw_msg->timeout = timeout;
 +}
 +
 +static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id,
 +                               bool lock_wait)
 +{
 +      struct bnxt_en_dev *en_dev = rdev->en_dev;
 +      struct hwrm_ring_free_input req = {0};
 +      struct hwrm_ring_free_output resp;
 +      struct bnxt_fw_msg fw_msg;
 +      bool do_unlock = false;
 +      int rc = -EINVAL;
 +
 +      if (!en_dev)
 +              return rc;
 +
 +      memset(&fw_msg, 0, sizeof(fw_msg));
 +      if (lock_wait) {
 +              rtnl_lock();
 +              do_unlock = true;
 +      }
 +
 +      bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1);
 +      req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
 +      req.ring_id = cpu_to_le16(fw_ring_id);
 +      bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
 +                          sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
 +      rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
 +      if (rc)
 +              dev_err(rdev_to_dev(rdev),
 +                      "Failed to free HW ring:%d :%#x", req.ring_id, rc);
 +      if (do_unlock)
 +              rtnl_unlock();
 +      return rc;
 +}
 +
 +static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr,
 +                                int pages, int type, u32 ring_mask,
 +                                u32 map_index, u16 *fw_ring_id)
 +{
 +      struct bnxt_en_dev *en_dev = rdev->en_dev;
 +      struct hwrm_ring_alloc_input req = {0};
 +      struct hwrm_ring_alloc_output resp;
 +      struct bnxt_fw_msg fw_msg;
 +      int rc = -EINVAL;
 +
 +      if (!en_dev)
 +              return rc;
 +
 +      memset(&fw_msg, 0, sizeof(fw_msg));
 +      rtnl_lock();
 +      bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1);
 +      req.enables = 0;
 +      req.page_tbl_addr =  cpu_to_le64(dma_arr[0]);
 +      if (pages > 1) {
 +              /* Page size is in log2 units */
 +              req.page_size = BNXT_PAGE_SHIFT;
 +              req.page_tbl_depth = 1;
 +      }
 +      req.fbo = 0;
 +      /* Association of ring index with doorbell index and MSIX number */
 +      req.logical_id = cpu_to_le16(map_index);
 +      req.length = cpu_to_le32(ring_mask + 1);
 +      req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
 +      req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
 +      bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
 +                          sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
 +      rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
 +      if (!rc)
 +              *fw_ring_id = le16_to_cpu(resp.ring_id);
 +
 +      rtnl_unlock();
 +      return rc;
 +}
 +
 +static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
 +                                    u32 fw_stats_ctx_id, bool lock_wait)
 +{
 +      struct bnxt_en_dev *en_dev = rdev->en_dev;
 +      struct hwrm_stat_ctx_free_input req = {0};
 +      struct bnxt_fw_msg fw_msg;
 +      bool do_unlock = false;
 +      int rc = -EINVAL;
 +
 +      if (!en_dev)
 +              return rc;
 +
 +      memset(&fw_msg, 0, sizeof(fw_msg));
 +      if (lock_wait) {
 +              rtnl_lock();
 +              do_unlock = true;
 +      }
 +
 +      bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1);
 +      req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
 +      bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&req,
 +                          sizeof(req), DFLT_HWRM_CMD_TIMEOUT);
 +      rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
 +      if (rc)
 +              dev_err(rdev_to_dev(rdev),
 +                      "Failed to free HW stats context %#x", rc);
 +
 +      if (do_unlock)
 +              rtnl_unlock();
 +      return rc;
 +}
 +
 +static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
 +                                     dma_addr_t dma_map,
 +                                     u32 *fw_stats_ctx_id)
 +{
 +      struct hwrm_stat_ctx_alloc_output resp = {0};
 +      struct hwrm_stat_ctx_alloc_input req = {0};
 +      struct bnxt_en_dev *en_dev = rdev->en_dev;
 +      struct bnxt_fw_msg fw_msg;
 +      int rc = -EINVAL;
 +
 +      *fw_stats_ctx_id = INVALID_STATS_CTX_ID;
 +
 +      if (!en_dev)
 +              return rc;
 +
 +      memset(&fw_msg, 0, sizeof(fw_msg));
 +      rtnl_lock();
 +
 +      bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
 +      req.update_period_ms = cpu_to_le32(1000);
 +      req.stats_dma_addr = cpu_to_le64(dma_map);
 +      bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
 +                          sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
 +      rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
 +      if (!rc)
 +              *fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id);
 +
 +      rtnl_unlock();
 +      return rc;
 +}
 +
 +/* Device */
 +
 +static bool is_bnxt_re_dev(struct net_device *netdev)
 +{
 +      struct ethtool_drvinfo drvinfo;
 +
 +      if (netdev->ethtool_ops && netdev->ethtool_ops->get_drvinfo) {
 +              memset(&drvinfo, 0, sizeof(drvinfo));
 +              netdev->ethtool_ops->get_drvinfo(netdev, &drvinfo);
 +
 +              if (strcmp(drvinfo.driver, "bnxt_en"))
 +                      return false;
 +              return true;
 +      }
 +      return false;
 +}
 +
 +static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
 +{
 +      struct bnxt_re_dev *rdev;
 +
 +      rcu_read_lock();
 +      list_for_each_entry_rcu(rdev, &bnxt_re_dev_list, list) {
 +              if (rdev->netdev == netdev) {
 +                      rcu_read_unlock();
 +                      return rdev;
 +              }
 +      }
 +      rcu_read_unlock();
 +      return NULL;
 +}
 +
 +static void bnxt_re_dev_unprobe(struct net_device *netdev,
 +                              struct bnxt_en_dev *en_dev)
 +{
 +      dev_put(netdev);
 +      module_put(en_dev->pdev->driver->driver.owner);
 +}
 +
 +static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
 +{
 +      struct bnxt *bp = netdev_priv(netdev);
 +      struct bnxt_en_dev *en_dev;
 +      struct pci_dev *pdev;
 +
 +      /* Call bnxt_en's RoCE probe via indirect API */
 +      if (!bp->ulp_probe)
 +              return ERR_PTR(-EINVAL);
 +
 +      en_dev = bp->ulp_probe(netdev);
 +      if (IS_ERR(en_dev))
 +              return en_dev;
 +
 +      pdev = en_dev->pdev;
 +      if (!pdev)
 +              return ERR_PTR(-EINVAL);
 +
 +      if (!(en_dev->flags & BNXT_EN_FLAG_ROCE_CAP)) {
 +              dev_dbg(&pdev->dev,
 +                      "%s: probe error: RoCE is not supported on this device",
 +                      ROCE_DRV_MODULE_NAME);
 +              return ERR_PTR(-ENODEV);
 +      }
 +
 +      /* Bump net device reference count */
 +      if (!try_module_get(pdev->driver->driver.owner))
 +              return ERR_PTR(-ENODEV);
 +
 +      dev_hold(netdev);
 +
 +      return en_dev;
 +}
 +
 +static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev)
 +{
 +      ib_unregister_device(&rdev->ibdev);
 +}
 +
 +static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 +{
 +      struct ib_device *ibdev = &rdev->ibdev;
 +
 +      /* ib device init */
 +      ibdev->owner = THIS_MODULE;
 +      ibdev->node_type = RDMA_NODE_IB_CA;
 +      strlcpy(ibdev->name, "bnxt_re%d", IB_DEVICE_NAME_MAX);
 +      strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
 +              strlen(BNXT_RE_DESC) + 5);
 +      ibdev->phys_port_cnt = 1;
 +
 +      bnxt_qplib_get_guid(rdev->netdev->dev_addr, (u8 *)&ibdev->node_guid);
 +
 +      ibdev->num_comp_vectors = 1;
++      ibdev->dev.parent = &rdev->en_dev->pdev->dev;
 +      ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
 +
 +      /* User space */
 +      ibdev->uverbs_abi_ver = BNXT_RE_ABI_VERSION;
 +      ibdev->uverbs_cmd_mask =
 +                      (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
 +                      (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
 +                      (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
 +                      (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
 +                      (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
 +                      (1ull << IB_USER_VERBS_CMD_REG_MR)              |
 +                      (1ull << IB_USER_VERBS_CMD_REREG_MR)            |
 +                      (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
 +                      (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
 +                      (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
 +                      (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
 +                      (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
 +                      (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
 +                      (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
 +                      (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
 +                      (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
 +                      (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
 +                      (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
 +                      (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
 +                      (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
 +                      (1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
 +                      (1ull << IB_USER_VERBS_CMD_MODIFY_AH)           |
 +                      (1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
 +                      (1ull << IB_USER_VERBS_CMD_DESTROY_AH);
 +      /* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */
 +
 +      /* Kernel verbs */
 +      ibdev->query_device             = bnxt_re_query_device;
 +      ibdev->modify_device            = bnxt_re_modify_device;
 +
 +      ibdev->query_port               = bnxt_re_query_port;
 +      ibdev->modify_port              = bnxt_re_modify_port;
 +      ibdev->get_port_immutable       = bnxt_re_get_port_immutable;
 +      ibdev->query_pkey               = bnxt_re_query_pkey;
 +      ibdev->query_gid                = bnxt_re_query_gid;
 +      ibdev->get_netdev               = bnxt_re_get_netdev;
 +      ibdev->add_gid                  = bnxt_re_add_gid;
 +      ibdev->del_gid                  = bnxt_re_del_gid;
 +      ibdev->get_link_layer           = bnxt_re_get_link_layer;
 +
 +      ibdev->alloc_pd                 = bnxt_re_alloc_pd;
 +      ibdev->dealloc_pd               = bnxt_re_dealloc_pd;
 +
 +      ibdev->create_ah                = bnxt_re_create_ah;
 +      ibdev->modify_ah                = bnxt_re_modify_ah;
 +      ibdev->query_ah                 = bnxt_re_query_ah;
 +      ibdev->destroy_ah               = bnxt_re_destroy_ah;
 +
 +      ibdev->create_qp                = bnxt_re_create_qp;
 +      ibdev->modify_qp                = bnxt_re_modify_qp;
 +      ibdev->query_qp                 = bnxt_re_query_qp;
 +      ibdev->destroy_qp               = bnxt_re_destroy_qp;
 +
 +      ibdev->post_send                = bnxt_re_post_send;
 +      ibdev->post_recv                = bnxt_re_post_recv;
 +
 +      ibdev->create_cq                = bnxt_re_create_cq;
 +      ibdev->destroy_cq               = bnxt_re_destroy_cq;
 +      ibdev->poll_cq                  = bnxt_re_poll_cq;
 +      ibdev->req_notify_cq            = bnxt_re_req_notify_cq;
 +
 +      ibdev->get_dma_mr               = bnxt_re_get_dma_mr;
 +      ibdev->dereg_mr                 = bnxt_re_dereg_mr;
 +      ibdev->alloc_mr                 = bnxt_re_alloc_mr;
 +      ibdev->map_mr_sg                = bnxt_re_map_mr_sg;
 +      ibdev->alloc_fmr                = bnxt_re_alloc_fmr;
 +      ibdev->map_phys_fmr             = bnxt_re_map_phys_fmr;
 +      ibdev->unmap_fmr                = bnxt_re_unmap_fmr;
 +      ibdev->dealloc_fmr              = bnxt_re_dealloc_fmr;
 +
 +      ibdev->reg_user_mr              = bnxt_re_reg_user_mr;
 +      ibdev->alloc_ucontext           = bnxt_re_alloc_ucontext;
 +      ibdev->dealloc_ucontext         = bnxt_re_dealloc_ucontext;
 +      ibdev->mmap                     = bnxt_re_mmap;
 +
 +      return ib_register_device(ibdev, NULL);
 +}
 +
 +static ssize_t show_rev(struct device *device, struct device_attribute *attr,
 +                      char *buf)
 +{
 +      struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
 +
 +      return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
 +}
 +
 +static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
 +                         char *buf)
 +{
 +      struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
 +
 +      return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->dev_attr.fw_ver);
 +}
 +
 +static ssize_t show_hca(struct device *device, struct device_attribute *attr,
 +                      char *buf)
 +{
 +      struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
 +
 +      return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
 +}
 +
 +static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL);
 +static DEVICE_ATTR(fw_rev, 0444, show_fw_ver, NULL);
 +static DEVICE_ATTR(hca_type, 0444, show_hca, NULL);
 +
 +static struct device_attribute *bnxt_re_attributes[] = {
 +      &dev_attr_hw_rev,
 +      &dev_attr_fw_rev,
 +      &dev_attr_hca_type
 +};
 +
 +static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
 +{
 +      dev_put(rdev->netdev);
 +      rdev->netdev = NULL;
 +
 +      mutex_lock(&bnxt_re_dev_lock);
 +      list_del_rcu(&rdev->list);
 +      mutex_unlock(&bnxt_re_dev_lock);
 +
 +      synchronize_rcu();
 +      flush_workqueue(bnxt_re_wq);
 +
 +      ib_dealloc_device(&rdev->ibdev);
 +      /* rdev is gone */
 +}
 +
 +static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
 +                                         struct bnxt_en_dev *en_dev)
 +{
 +      struct bnxt_re_dev *rdev;
 +
 +      /* Allocate bnxt_re_dev instance here */
 +      rdev = (struct bnxt_re_dev *)ib_alloc_device(sizeof(*rdev));
 +      if (!rdev) {
 +              dev_err(NULL, "%s: bnxt_re_dev allocation failure!",
 +                      ROCE_DRV_MODULE_NAME);
 +              return NULL;
 +      }
 +      /* Default values */
 +      rdev->netdev = netdev;
 +      dev_hold(rdev->netdev);
 +      rdev->en_dev = en_dev;
 +      rdev->id = rdev->en_dev->pdev->devfn;
 +      INIT_LIST_HEAD(&rdev->qp_list);
 +      mutex_init(&rdev->qp_lock);
 +      atomic_set(&rdev->qp_count, 0);
 +      atomic_set(&rdev->cq_count, 0);
 +      atomic_set(&rdev->srq_count, 0);
 +      atomic_set(&rdev->mr_count, 0);
 +      atomic_set(&rdev->mw_count, 0);
 +      rdev->cosq[0] = 0xFFFF;
 +      rdev->cosq[1] = 0xFFFF;
 +
 +      mutex_lock(&bnxt_re_dev_lock);
 +      list_add_tail_rcu(&rdev->list, &bnxt_re_dev_list);
 +      mutex_unlock(&bnxt_re_dev_lock);
 +      return rdev;
 +}
 +
 +static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
 +                             struct creq_func_event *aeqe)
 +{
 +      switch (aeqe->event) {
 +      case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
 +              break;
 +      case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
 +              break;
 +      case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
 +              break;
 +      case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
 +              break;
 +      case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
 +              break;
 +      case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
 +              break;
 +      case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
 +              break;
 +      case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
 +              break;
 +      case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
 +              break;
 +      case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
 +              break;
 +      case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
 +              break;
 +      default:
 +              return -EINVAL;
 +      }
 +      return 0;
 +}
 +
 +static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
 +                             struct bnxt_qplib_cq *handle)
 +{
 +      struct bnxt_re_cq *cq = container_of(handle, struct bnxt_re_cq,
 +                                           qplib_cq);
 +
 +      if (!cq) {
 +              dev_err(NULL, "%s: CQ is NULL, CQN not handled",
 +                      ROCE_DRV_MODULE_NAME);
 +              return -EINVAL;
 +      }
 +      if (cq->ib_cq.comp_handler) {
 +              /* Lock comp_handler? */
 +              (*cq->ib_cq.comp_handler)(&cq->ib_cq, cq->ib_cq.cq_context);
 +      }
 +
 +      return 0;
 +}
 +
 +static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
 +{
 +      if (rdev->nq.hwq.max_elements)
 +              bnxt_qplib_disable_nq(&rdev->nq);
 +
 +      if (rdev->qplib_res.rcfw)
 +              bnxt_qplib_cleanup_res(&rdev->qplib_res);
 +}
 +
 +static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
 +{
 +      int rc = 0;
 +
 +      bnxt_qplib_init_res(&rdev->qplib_res);
 +
 +      if (rdev->msix_entries[BNXT_RE_NQ_IDX].vector <= 0)
 +              return -EINVAL;
 +
 +      rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq,
 +                                rdev->msix_entries[BNXT_RE_NQ_IDX].vector,
 +                                rdev->msix_entries[BNXT_RE_NQ_IDX].db_offset,
 +                                &bnxt_re_cqn_handler,
 +                                NULL);
 +
 +      if (rc)
 +              dev_err(rdev_to_dev(rdev), "Failed to enable NQ: %#x", rc);
 +
 +      return rc;
 +}
 +
 +static void bnxt_re_free_res(struct bnxt_re_dev *rdev, bool lock_wait)
 +{
 +      if (rdev->nq.hwq.max_elements) {
 +              bnxt_re_net_ring_free(rdev, rdev->nq.ring_id, lock_wait);
 +              bnxt_qplib_free_nq(&rdev->nq);
 +      }
 +      if (rdev->qplib_res.dpi_tbl.max) {
 +              bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
 +                                     &rdev->qplib_res.dpi_tbl,
 +                                     &rdev->dpi_privileged);
 +      }
 +      if (rdev->qplib_res.rcfw) {
 +              bnxt_qplib_free_res(&rdev->qplib_res);
 +              rdev->qplib_res.rcfw = NULL;
 +      }
 +}
 +
 +static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
 +{
 +      int rc = 0;
 +
 +      /* Configure and allocate resources for qplib */
 +      rdev->qplib_res.rcfw = &rdev->rcfw;
 +      rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
 +      if (rc)
 +              goto fail;
 +
 +      rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->en_dev->pdev,
 +                                rdev->netdev, &rdev->dev_attr);
 +      if (rc)
 +              goto fail;
 +
 +      rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl,
 +                                &rdev->dpi_privileged,
 +                                rdev);
 +      if (rc)
 +              goto fail;
 +
 +      rdev->nq.hwq.max_elements = BNXT_RE_MAX_CQ_COUNT +
 +                                  BNXT_RE_MAX_SRQC_COUNT + 2;
 +      rc = bnxt_qplib_alloc_nq(rdev->en_dev->pdev, &rdev->nq);
 +      if (rc) {
 +              dev_err(rdev_to_dev(rdev),
 +                      "Failed to allocate NQ memory: %#x", rc);
 +              goto fail;
 +      }
 +      rc = bnxt_re_net_ring_alloc
 +                      (rdev, rdev->nq.hwq.pbl[PBL_LVL_0].pg_map_arr,
 +                       rdev->nq.hwq.pbl[rdev->nq.hwq.level].pg_count,
 +                       HWRM_RING_ALLOC_CMPL, BNXT_QPLIB_NQE_MAX_CNT - 1,
 +                       rdev->msix_entries[BNXT_RE_NQ_IDX].ring_idx,
 +                       &rdev->nq.ring_id);
 +      if (rc) {
 +              dev_err(rdev_to_dev(rdev),
 +                      "Failed to allocate NQ ring: %#x", rc);
 +              goto free_nq;
 +      }
 +      return 0;
 +free_nq:
 +      bnxt_qplib_free_nq(&rdev->nq);
 +fail:
 +      rdev->qplib_res.rcfw = NULL;
 +      return rc;
 +}
 +
 +static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
 +                                 u8 port_num, enum ib_event_type event)
 +{
 +      struct ib_event ib_event;
 +
 +      ib_event.device = ibdev;
 +      if (qp)
 +              ib_event.element.qp = qp;
 +      else
 +              ib_event.element.port_num = port_num;
 +      ib_event.event = event;
 +      ib_dispatch_event(&ib_event);
 +}
 +
 +#define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN      0x02
 +static int bnxt_re_query_hwrm_pri2cos(struct bnxt_re_dev *rdev, u8 dir,
 +                                    u64 *cid_map)
 +{
 +      struct hwrm_queue_pri2cos_qcfg_input req = {0};
 +      struct bnxt *bp = netdev_priv(rdev->netdev);
 +      struct hwrm_queue_pri2cos_qcfg_output resp;
 +      struct bnxt_en_dev *en_dev = rdev->en_dev;
 +      struct bnxt_fw_msg fw_msg;
 +      u32 flags = 0;
 +      u8 *qcfgmap, *tmp_map;
 +      int rc = 0, i;
 +
 +      if (!cid_map)
 +              return -EINVAL;
 +
 +      memset(&fw_msg, 0, sizeof(fw_msg));
 +      bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
 +                            HWRM_QUEUE_PRI2COS_QCFG, -1, -1);
 +      flags |= (dir & 0x01);
 +      flags |= HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN;
 +      req.flags = cpu_to_le32(flags);
 +      req.port_id = bp->pf.port_id;
 +
 +      bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
 +                          sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
 +      rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
 +      if (rc)
 +              return rc;
 +
 +      if (resp.queue_cfg_info) {
 +              dev_warn(rdev_to_dev(rdev),
 +                       "Asymmetric cos queue configuration detected");
 +              dev_warn(rdev_to_dev(rdev),
 +                       " on device, QoS may not be fully functional\n");
 +      }
 +      qcfgmap = &resp.pri0_cos_queue_id;
 +      tmp_map = (u8 *)cid_map;
 +      for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
 +              tmp_map[i] = qcfgmap[i];
 +
 +      return rc;
 +}
 +
 +static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev,
 +                                      struct bnxt_re_qp *qp)
 +{
 +      return (qp->ib_qp.qp_type == IB_QPT_GSI) || (qp == rdev->qp1_sqp);
 +}
 +
 +static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev)
 +{
 +      int mask = IB_QP_STATE;
 +      struct ib_qp_attr qp_attr;
 +      struct bnxt_re_qp *qp;
 +
 +      qp_attr.qp_state = IB_QPS_ERR;
 +      mutex_lock(&rdev->qp_lock);
 +      list_for_each_entry(qp, &rdev->qp_list, list) {
 +              /* Modify the state of all QPs except QP1/Shadow QP */
 +              if (!bnxt_re_is_qp1_or_shadow_qp(rdev, qp)) {
 +                      if (qp->qplib_qp.state !=
 +                          CMDQ_MODIFY_QP_NEW_STATE_RESET &&
 +                          qp->qplib_qp.state !=
 +                          CMDQ_MODIFY_QP_NEW_STATE_ERR) {
 +                              bnxt_re_dispatch_event(&rdev->ibdev, &qp->ib_qp,
 +                                                     1, IB_EVENT_QP_FATAL);
 +                              bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, mask,
 +                                                NULL);
 +                      }
 +              }
 +      }
 +      mutex_unlock(&rdev->qp_lock);
 +}
 +
 +static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev)
 +{
 +      u32 prio_map = 0, tmp_map = 0;
 +      struct net_device *netdev;
 +      struct dcb_app app;
 +
 +      netdev = rdev->netdev;
 +
 +      memset(&app, 0, sizeof(app));
 +      app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
 +      app.protocol = ETH_P_IBOE;
 +      tmp_map = dcb_ieee_getapp_mask(netdev, &app);
 +      prio_map = tmp_map;
 +
 +      app.selector = IEEE_8021QAZ_APP_SEL_DGRAM;
 +      app.protocol = ROCE_V2_UDP_DPORT;
 +      tmp_map = dcb_ieee_getapp_mask(netdev, &app);
 +      prio_map |= tmp_map;
 +
 +      if (!prio_map)
 +              prio_map = -EFAULT;
 +      return prio_map;
 +}
 +
 +static void bnxt_re_parse_cid_map(u8 prio_map, u8 *cid_map, u16 *cosq)
 +{
 +      u16 prio;
 +      u8 id;
 +
 +      for (prio = 0, id = 0; prio < 8; prio++) {
 +              if (prio_map & (1 << prio)) {
 +                      cosq[id] = cid_map[prio];
 +                      id++;
 +                      if (id == 2) /* Max 2 tcs supported */
 +                              break;
 +              }
 +      }
 +}
 +
 +static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
 +{
 +      u8 prio_map = 0;
 +      u64 cid_map;
 +      int rc;
 +
 +      /* Get priority for roce */
 +      rc = bnxt_re_get_priority_mask(rdev);
 +      if (rc < 0)
 +              return rc;
 +      prio_map = (u8)rc;
 +
 +      if (prio_map == rdev->cur_prio_map)
 +              return 0;
 +      rdev->cur_prio_map = prio_map;
 +      /* Get cosq id for this priority */
 +      rc = bnxt_re_query_hwrm_pri2cos(rdev, 0, &cid_map);
 +      if (rc) {
 +              dev_warn(rdev_to_dev(rdev), "no cos for p_mask %x\n", prio_map);
 +              return rc;
 +      }
 +      /* Parse CoS IDs for app priority */
 +      bnxt_re_parse_cid_map(prio_map, (u8 *)&cid_map, rdev->cosq);
 +
 +      /* Config BONO. */
 +      rc = bnxt_qplib_map_tc2cos(&rdev->qplib_res, rdev->cosq);
 +      if (rc) {
 +              dev_warn(rdev_to_dev(rdev), "no tc for cos{%x, %x}\n",
 +                       rdev->cosq[0], rdev->cosq[1]);
 +              return rc;
 +      }
 +
 +      return 0;
 +}
 +
 +static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
 +{
 +      int i, rc;
 +
 +      if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
 +              for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++)
 +                      device_remove_file(&rdev->ibdev.dev,
 +                                         bnxt_re_attributes[i]);
 +              /* Cleanup ib dev */
 +              bnxt_re_unregister_ib(rdev);
 +      }
 +      if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
 +              cancel_delayed_work(&rdev->worker);
 +
 +      bnxt_re_cleanup_res(rdev);
 +      bnxt_re_free_res(rdev, lock_wait);
 +
 +      if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
 +              rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
 +              if (rc)
 +                      dev_warn(rdev_to_dev(rdev),
 +                               "Failed to deinitialize RCFW: %#x", rc);
 +              bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id,
 +                                         lock_wait);
 +              bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
 +              bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
 +              bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, lock_wait);
 +              bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
 +      }
 +      if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) {
 +              rc = bnxt_re_free_msix(rdev, lock_wait);
 +              if (rc)
 +                      dev_warn(rdev_to_dev(rdev),
 +                               "Failed to free MSI-X vectors: %#x", rc);
 +      }
 +      if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) {
 +              rc = bnxt_re_unregister_netdev(rdev, lock_wait);
 +              if (rc)
 +                      dev_warn(rdev_to_dev(rdev),
 +                               "Failed to unregister with netdev: %#x", rc);
 +      }
 +}
 +
 +static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
 +{
 +      u32 i;
 +
 +      rdev->qplib_ctx.qpc_count = BNXT_RE_MAX_QPC_COUNT;
 +      rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT;
 +      rdev->qplib_ctx.srqc_count = BNXT_RE_MAX_SRQC_COUNT;
 +      rdev->qplib_ctx.cq_count = BNXT_RE_MAX_CQ_COUNT;
 +      for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
 +              rdev->qplib_ctx.tqm_count[i] =
 +              rdev->dev_attr.tqm_alloc_reqs[i];
 +}
 +
 +/* worker thread for polling periodic events. Now used for QoS programming*/
 +static void bnxt_re_worker(struct work_struct *work)
 +{
 +      struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
 +                                              worker.work);
 +
 +      bnxt_re_setup_qos(rdev);
 +      schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
 +}
 +
 +static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 +{
 +      int i, j, rc;
 +
 +      /* Registered a new RoCE device instance to netdev */
 +      rc = bnxt_re_register_netdev(rdev);
 +      if (rc) {
 +              pr_err("Failed to register with netedev: %#x\n", rc);
 +              return -EINVAL;
 +      }
 +      set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
 +
 +      rc = bnxt_re_request_msix(rdev);
 +      if (rc) {
 +              pr_err("Failed to get MSI-X vectors: %#x\n", rc);
 +              rc = -EINVAL;
 +              goto fail;
 +      }
 +      set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags);
 +
 +      /* Establish RCFW Communication Channel to initialize the context
 +       * memory for the function and all child VFs
 +       */
 +      rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw);
 +      if (rc)
 +              goto fail;
 +
 +      rc = bnxt_re_net_ring_alloc
 +                      (rdev, rdev->rcfw.creq.pbl[PBL_LVL_0].pg_map_arr,
 +                       rdev->rcfw.creq.pbl[rdev->rcfw.creq.level].pg_count,
 +                       HWRM_RING_ALLOC_CMPL, BNXT_QPLIB_CREQE_MAX_CNT - 1,
 +                       rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx,
 +                       &rdev->rcfw.creq_ring_id);
 +      if (rc) {
 +              pr_err("Failed to allocate CREQ: %#x\n", rc);
 +              goto free_rcfw;
 +      }
 +      rc = bnxt_qplib_enable_rcfw_channel
 +                              (rdev->en_dev->pdev, &rdev->rcfw,
 +                               rdev->msix_entries[BNXT_RE_AEQ_IDX].vector,
 +                               rdev->msix_entries[BNXT_RE_AEQ_IDX].db_offset,
 +                               0, &bnxt_re_aeq_handler);
 +      if (rc) {
 +              pr_err("Failed to enable RCFW channel: %#x\n", rc);
 +              goto free_ring;
 +      }
 +
 +      rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
 +      if (rc)
 +              goto disable_rcfw;
 +      bnxt_re_set_resource_limits(rdev);
 +
 +      rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0);
 +      if (rc) {
 +              pr_err("Failed to allocate QPLIB context: %#x\n", rc);
 +              goto disable_rcfw;
 +      }
 +      rc = bnxt_re_net_stats_ctx_alloc(rdev,
 +                                       rdev->qplib_ctx.stats.dma_map,
 +                                       &rdev->qplib_ctx.stats.fw_id);
 +      if (rc) {
 +              pr_err("Failed to allocate stats context: %#x\n", rc);
 +              goto free_ctx;
 +      }
 +
 +      rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx, 0);
 +      if (rc) {
 +              pr_err("Failed to initialize RCFW: %#x\n", rc);
 +              goto free_sctx;
 +      }
 +      set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags);
 +
 +      /* Resources based on the 'new' device caps */
 +      rc = bnxt_re_alloc_res(rdev);
 +      if (rc) {
 +              pr_err("Failed to allocate resources: %#x\n", rc);
 +              goto fail;
 +      }
 +      rc = bnxt_re_init_res(rdev);
 +      if (rc) {
 +              pr_err("Failed to initialize resources: %#x\n", rc);
 +              goto fail;
 +      }
 +
 +      rc = bnxt_re_setup_qos(rdev);
 +      if (rc)
 +              pr_info("RoCE priority not yet configured\n");
 +
 +      INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
 +      set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
 +      schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
 +
 +      /* Register ib dev */
 +      rc = bnxt_re_register_ib(rdev);
 +      if (rc) {
 +              pr_err("Failed to register with IB: %#x\n", rc);
 +              goto fail;
 +      }
 +      dev_info(rdev_to_dev(rdev), "Device registered successfully");
 +      for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) {
 +              rc = device_create_file(&rdev->ibdev.dev,
 +                                      bnxt_re_attributes[i]);
 +              if (rc) {
 +                      dev_err(rdev_to_dev(rdev),
 +                              "Failed to create IB sysfs: %#x", rc);
 +                      /* Must clean up all created device files */
 +                      for (j = 0; j < i; j++)
 +                              device_remove_file(&rdev->ibdev.dev,
 +                                                 bnxt_re_attributes[j]);
 +                      bnxt_re_unregister_ib(rdev);
 +                      goto fail;
 +              }
 +      }
 +      set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
 +      bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
 +      bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);
 +
 +      return 0;
 +free_sctx:
 +      bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id, true);
 +free_ctx:
 +      bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
 +disable_rcfw:
 +      bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
 +free_ring:
 +      bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, true);
 +free_rcfw:
 +      bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
 +fail:
 +      bnxt_re_ib_unreg(rdev, true);
 +      return rc;
 +}
 +
 +static void bnxt_re_dev_unreg(struct bnxt_re_dev *rdev)
 +{
 +      struct bnxt_en_dev *en_dev = rdev->en_dev;
 +      struct net_device *netdev = rdev->netdev;
 +
 +      bnxt_re_dev_remove(rdev);
 +
 +      if (netdev)
 +              bnxt_re_dev_unprobe(netdev, en_dev);
 +}
 +
 +static int bnxt_re_dev_reg(struct bnxt_re_dev **rdev, struct net_device *netdev)
 +{
 +      struct bnxt_en_dev *en_dev;
 +      int rc = 0;
 +
 +      if (!is_bnxt_re_dev(netdev))
 +              return -ENODEV;
 +
 +      en_dev = bnxt_re_dev_probe(netdev);
 +      if (IS_ERR(en_dev)) {
 +              if (en_dev != ERR_PTR(-ENODEV))
 +                      pr_err("%s: Failed to probe\n", ROCE_DRV_MODULE_NAME);
 +              rc = PTR_ERR(en_dev);
 +              goto exit;
 +      }
 +      *rdev = bnxt_re_dev_add(netdev, en_dev);
 +      if (!*rdev) {
 +              rc = -ENOMEM;
 +              bnxt_re_dev_unprobe(netdev, en_dev);
 +              goto exit;
 +      }
 +exit:
 +      return rc;
 +}
 +
 +static void bnxt_re_remove_one(struct bnxt_re_dev *rdev)
 +{
 +      pci_dev_put(rdev->en_dev->pdev);
 +}
 +
 +/* Handle all deferred netevents tasks */
 +static void bnxt_re_task(struct work_struct *work)
 +{
 +      struct bnxt_re_work *re_work;
 +      struct bnxt_re_dev *rdev;
 +      int rc = 0;
 +
 +      re_work = container_of(work, struct bnxt_re_work, work);
 +      rdev = re_work->rdev;
 +
 +      if (re_work->event != NETDEV_REGISTER &&
 +          !test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
 +              return;
 +
 +      switch (re_work->event) {
 +      case NETDEV_REGISTER:
 +              rc = bnxt_re_ib_reg(rdev);
 +              if (rc)
 +                      dev_err(rdev_to_dev(rdev),
 +                              "Failed to register with IB: %#x", rc);
 +              break;
 +      case NETDEV_UP:
 +              bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
 +                                     IB_EVENT_PORT_ACTIVE);
 +              break;
 +      case NETDEV_DOWN:
 +              bnxt_re_dev_stop(rdev);
 +              break;
 +      case NETDEV_CHANGE:
 +              if (!netif_carrier_ok(rdev->netdev))
 +                      bnxt_re_dev_stop(rdev);
 +              else if (netif_carrier_ok(rdev->netdev))
 +                      bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
 +                                             IB_EVENT_PORT_ACTIVE);
 +              break;
 +      default:
 +              break;
 +      }
 +      kfree(re_work);
 +}
 +
 +static void bnxt_re_init_one(struct bnxt_re_dev *rdev)
 +{
 +      pci_dev_get(rdev->en_dev->pdev);
 +}
 +
 +/*
 + * "Notifier chain callback can be invoked for the same chain from
 + * different CPUs at the same time".
 + *
 + * For cases when the netdev is already present, our call to the
 + * register_netdevice_notifier() will actually get the rtnl_lock()
 + * before sending NETDEV_REGISTER and (if up) NETDEV_UP
 + * events.
 + *
 + * But for cases when the netdev is not already present, the notifier
 + * chain is subjected to be invoked from different CPUs simultaneously.
 + *
 + * This is protected by the netdev_mutex.
 + */
 +static int bnxt_re_netdev_event(struct notifier_block *notifier,
 +                              unsigned long event, void *ptr)
 +{
 +      struct net_device *real_dev, *netdev = netdev_notifier_info_to_dev(ptr);
 +      struct bnxt_re_work *re_work;
 +      struct bnxt_re_dev *rdev;
 +      int rc = 0;
 +      bool sch_work = false;
 +
 +      real_dev = rdma_vlan_dev_real_dev(netdev);
 +      if (!real_dev)
 +              real_dev = netdev;
 +
 +      rdev = bnxt_re_from_netdev(real_dev);
 +      if (!rdev && event != NETDEV_REGISTER)
 +              goto exit;
 +      if (real_dev != netdev)
 +              goto exit;
 +
 +      switch (event) {
 +      case NETDEV_REGISTER:
 +              if (rdev)
 +                      break;
 +              rc = bnxt_re_dev_reg(&rdev, real_dev);
 +              if (rc == -ENODEV)
 +                      break;
 +              if (rc) {
 +                      pr_err("Failed to register with the device %s: %#x\n",
 +                             real_dev->name, rc);
 +                      break;
 +              }
 +              bnxt_re_init_one(rdev);
 +              sch_work = true;
 +              break;
 +
 +      case NETDEV_UNREGISTER:
 +              bnxt_re_ib_unreg(rdev, false);
 +              bnxt_re_remove_one(rdev);
 +              bnxt_re_dev_unreg(rdev);
 +              break;
 +
 +      default:
 +              sch_work = true;
 +              break;
 +      }
 +      if (sch_work) {
 +              /* Allocate for the deferred task */
 +              re_work = kzalloc(sizeof(*re_work), GFP_ATOMIC);
 +              if (re_work) {
 +                      re_work->rdev = rdev;
 +                      re_work->event = event;
 +                      re_work->vlan_dev = (real_dev == netdev ?
 +                                           NULL : netdev);
 +                      INIT_WORK(&re_work->work, bnxt_re_task);
 +                      queue_work(bnxt_re_wq, &re_work->work);
 +              }
 +      }
 +
 +exit:
 +      return NOTIFY_DONE;
 +}
 +
 +static struct notifier_block bnxt_re_netdev_notifier = {
 +      .notifier_call = bnxt_re_netdev_event
 +};
 +
 +static int __init bnxt_re_mod_init(void)
 +{
 +      int rc = 0;
 +
 +      pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version);
 +
 +      bnxt_re_wq = create_singlethread_workqueue("bnxt_re");
 +      if (!bnxt_re_wq)
 +              return -ENOMEM;
 +
 +      INIT_LIST_HEAD(&bnxt_re_dev_list);
 +
 +      rc = register_netdevice_notifier(&bnxt_re_netdev_notifier);
 +      if (rc) {
 +              pr_err("%s: Cannot register to netdevice_notifier",
 +                     ROCE_DRV_MODULE_NAME);
 +              goto err_netdev;
 +      }
 +      return 0;
 +
 +err_netdev:
 +      destroy_workqueue(bnxt_re_wq);
 +
 +      return rc;
 +}
 +
 +static void __exit bnxt_re_mod_exit(void)
 +{
 +      unregister_netdevice_notifier(&bnxt_re_netdev_notifier);
 +      if (bnxt_re_wq)
 +              destroy_workqueue(bnxt_re_wq);
 +}
 +
 +module_init(bnxt_re_mod_init);
 +module_exit(bnxt_re_mod_exit);
index 48649f93258a41e8ecf8b4214d4d518a28a07318,8db71dcb4599028c4cae0eb23dcd3393c882395a..318ec5267bdfe1277181ae69980f5da19b311fce
@@@ -1133,9 -1133,18 +1133,9 @@@ static int iwch_query_port(struct ib_de
        dev = to_iwch_dev(ibdev);
        netdev = dev->rdev.port_info.lldevs[port-1];
  
 -      memset(props, 0, sizeof(struct ib_port_attr));
 +      /* props being zeroed by the caller, avoid zeroing it here */
        props->max_mtu = IB_MTU_4096;
 -      if (netdev->mtu >= 4096)
 -              props->active_mtu = IB_MTU_4096;
 -      else if (netdev->mtu >= 2048)
 -              props->active_mtu = IB_MTU_2048;
 -      else if (netdev->mtu >= 1024)
 -              props->active_mtu = IB_MTU_1024;
 -      else if (netdev->mtu >= 512)
 -              props->active_mtu = IB_MTU_512;
 -      else
 -              props->active_mtu = IB_MTU_256;
 +      props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
  
        if (!netif_carrier_ok(netdev))
                props->state = IB_PORT_DOWN;
@@@ -1329,14 -1338,13 +1329,14 @@@ static int iwch_port_immutable(struct i
        struct ib_port_attr attr;
        int err;
  
 -      err = iwch_query_port(ibdev, port_num, &attr);
 +      immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
 +
 +      err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
  
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
 -      immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
  
        return 0;
  }
@@@ -1393,7 -1401,7 +1393,7 @@@ int iwch_register_device(struct iwch_de
        memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
        dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
        dev->ibdev.num_comp_vectors = 1;
-       dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
+       dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev;
        dev->ibdev.query_device = iwch_query_device;
        dev->ibdev.query_port = iwch_query_port;
        dev->ibdev.query_pkey = iwch_query_pkey;
index bdf7de571d838d824dd5fc57d8e357a35ddfe84a,6daf6d95e620a300f3c5f4e70b24de002d798442..df64417ab6f24a874f049f779c1cd2a917e92c77
@@@ -93,28 -93,17 +93,28 @@@ static int c4iw_process_mad(struct ib_d
        return -ENOSYS;
  }
  
 -static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
 +void _c4iw_free_ucontext(struct kref *kref)
  {
 -      struct c4iw_dev *rhp = to_c4iw_dev(context->device);
 -      struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
 +      struct c4iw_ucontext *ucontext;
 +      struct c4iw_dev *rhp;
        struct c4iw_mm_entry *mm, *tmp;
  
 -      PDBG("%s context %p\n", __func__, context);
 +      ucontext = container_of(kref, struct c4iw_ucontext, kref);
 +      rhp = to_c4iw_dev(ucontext->ibucontext.device);
 +
 +      PDBG("%s ucontext %p\n", __func__, ucontext);
        list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
                kfree(mm);
        c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx);
        kfree(ucontext);
 +}
 +
 +static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
 +{
 +      struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
 +
 +      PDBG("%s context %p\n", __func__, context);
 +      c4iw_put_ucontext(ucontext);
        return 0;
  }
  
@@@ -138,7 -127,6 +138,7 @@@ static struct ib_ucontext *c4iw_alloc_u
        c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx);
        INIT_LIST_HEAD(&context->mmaps);
        spin_lock_init(&context->mmap_lock);
 +      kref_init(&context->kref);
  
        if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) {
                if (!warned++)
@@@ -370,9 -358,19 +370,9 @@@ static int c4iw_query_port(struct ib_de
  
        dev = to_c4iw_dev(ibdev);
        netdev = dev->rdev.lldi.ports[port-1];
 -
 -      memset(props, 0, sizeof(struct ib_port_attr));
 +      /* props being zeroed by the caller, avoid zeroing it here */
        props->max_mtu = IB_MTU_4096;
 -      if (netdev->mtu >= 4096)
 -              props->active_mtu = IB_MTU_4096;
 -      else if (netdev->mtu >= 2048)
 -              props->active_mtu = IB_MTU_2048;
 -      else if (netdev->mtu >= 1024)
 -              props->active_mtu = IB_MTU_1024;
 -      else if (netdev->mtu >= 512)
 -              props->active_mtu = IB_MTU_512;
 -      else
 -              props->active_mtu = IB_MTU_256;
 +      props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
  
        if (!netif_carrier_ok(netdev))
                props->state = IB_PORT_DOWN;
@@@ -507,14 -505,13 +507,14 @@@ static int c4iw_port_immutable(struct i
        struct ib_port_attr attr;
        int err;
  
 -      err = c4iw_query_port(ibdev, port_num, &attr);
 +      immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
 +
 +      err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
  
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
 -      immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
  
        return 0;
  }
@@@ -572,7 -569,7 +572,7 @@@ int c4iw_register_device(struct c4iw_de
        memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
        dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports;
        dev->ibdev.num_comp_vectors =  dev->rdev.lldi.nciq;
-       dev->ibdev.dma_device = &(dev->rdev.lldi.pdev->dev);
+       dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev;
        dev->ibdev.query_device = c4iw_query_device;
        dev->ibdev.query_port = c4iw_query_port;
        dev->ibdev.query_pkey = c4iw_query_pkey;
        dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
        dev->ibdev.get_port_immutable = c4iw_port_immutable;
        dev->ibdev.get_dev_fw_str = get_dev_fw_str;
 -      dev->ibdev.drain_sq = c4iw_drain_sq;
 -      dev->ibdev.drain_rq = c4iw_drain_rq;
  
        dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
        if (!dev->ibdev.iwcm)
index 33f00f0719c561acec89667bed0566fe8432bb71,2c9efae729720b667a61a9b60decd1d4f8e866db..222315fadab11e00f36335ed8618fd72b73bd851
@@@ -291,7 -291,7 +291,7 @@@ static void wss_insert(void *address
  /*
   * Is the working set larger than the threshold?
   */
 -static inline int wss_exceeds_threshold(void)
 +static inline bool wss_exceeds_threshold(void)
  {
        return atomic_read(&wss.total_count) >= wss.threshold;
  }
@@@ -419,19 -419,18 +419,19 @@@ __be64 ib_hfi1_sys_image_guid
   * @ss: the SGE state
   * @data: the data to copy
   * @length: the length of the data
 + * @release: boolean to release MR
   * @copy_last: do a separate copy of the last 8 bytes
   */
  void hfi1_copy_sge(
        struct rvt_sge_state *ss,
        void *data, u32 length,
 -      int release,
 -      int copy_last)
 +      bool release,
 +      bool copy_last)
  {
        struct rvt_sge *sge = &ss->sge;
 -      int in_last = 0;
        int i;
 -      int cacheless_copy = 0;
 +      bool in_last = false;
 +      bool cacheless_copy = false;
  
        if (sge_copy_mode == COPY_CACHELESS) {
                cacheless_copy = length >= PAGE_SIZE;
                if (length > 8) {
                        length -= 8;
                } else {
 -                      copy_last = 0;
 -                      in_last = 1;
 +                      copy_last = false;
 +                      in_last = true;
                }
        }
  
  again:
        while (length) {
 -              u32 len = sge->length;
 +              u32 len = rvt_get_sge_length(sge, length);
  
 -              if (len > length)
 -                      len = length;
 -              if (len > sge->sge_length)
 -                      len = sge->sge_length;
                WARN_ON_ONCE(len == 0);
                if (unlikely(in_last)) {
                        /* enforce byte transfer ordering */
                } else {
                        memcpy(sge->vaddr, data, len);
                }
 -              sge->vaddr += len;
 -              sge->length -= len;
 -              sge->sge_length -= len;
 -              if (sge->sge_length == 0) {
 -                      if (release)
 -                              rvt_put_mr(sge->mr);
 -                      if (--ss->num_sge)
 -                              *sge = *ss->sg_list++;
 -              } else if (sge->length == 0 && sge->mr->lkey) {
 -                      if (++sge->n >= RVT_SEGSZ) {
 -                              if (++sge->m >= sge->mr->mapsz)
 -                                      break;
 -                              sge->n = 0;
 -                      }
 -                      sge->vaddr =
 -                              sge->mr->map[sge->m]->segs[sge->n].vaddr;
 -                      sge->length =
 -                              sge->mr->map[sge->m]->segs[sge->n].length;
 -              }
 +              rvt_update_sge(ss, len, release);
                data += len;
                length -= len;
        }
  
        if (copy_last) {
 -              copy_last = 0;
 -              in_last = 1;
 +              copy_last = false;
 +              in_last = true;
                length = 8;
                goto again;
        }
  }
  
 -/**
 - * hfi1_skip_sge - skip over SGE memory
 - * @ss: the SGE state
 - * @length: the number of bytes to skip
 - */
 -void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
 -{
 -      struct rvt_sge *sge = &ss->sge;
 -
 -      while (length) {
 -              u32 len = sge->length;
 -
 -              if (len > length)
 -                      len = length;
 -              if (len > sge->sge_length)
 -                      len = sge->sge_length;
 -              WARN_ON_ONCE(len == 0);
 -              sge->vaddr += len;
 -              sge->length -= len;
 -              sge->sge_length -= len;
 -              if (sge->sge_length == 0) {
 -                      if (release)
 -                              rvt_put_mr(sge->mr);
 -                      if (--ss->num_sge)
 -                              *sge = *ss->sg_list++;
 -              } else if (sge->length == 0 && sge->mr->lkey) {
 -                      if (++sge->n >= RVT_SEGSZ) {
 -                              if (++sge->m >= sge->mr->mapsz)
 -                                      break;
 -                              sge->n = 0;
 -                      }
 -                      sge->vaddr =
 -                              sge->mr->map[sge->m]->segs[sge->n].vaddr;
 -                      sge->length =
 -                              sge->mr->map[sge->m]->segs[sge->n].length;
 -              }
 -              length -= len;
 -      }
 -}
 -
  /*
   * Make sure the QP is ready and able to accept the given opcode.
   */
@@@ -515,7 -576,7 +515,7 @@@ void hfi1_ib_rcv(struct hfi1_packet *pa
        struct ib_header *hdr = packet->hdr;
        u32 tlen = packet->tlen;
        struct hfi1_pportdata *ppd = rcd->ppd;
 -      struct hfi1_ibport *ibp = &ppd->ibport_data;
 +      struct hfi1_ibport *ibp = rcd_to_iport(rcd);
        struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
        opcode_handler packet_handler;
        unsigned long flags;
@@@ -628,6 -689,27 +628,6 @@@ static void mem_timer(unsigned long dat
                hfi1_qp_wakeup(qp, RVT_S_WAIT_KMEM);
  }
  
 -void update_sge(struct rvt_sge_state *ss, u32 length)
 -{
 -      struct rvt_sge *sge = &ss->sge;
 -
 -      sge->vaddr += length;
 -      sge->length -= length;
 -      sge->sge_length -= length;
 -      if (sge->sge_length == 0) {
 -              if (--ss->num_sge)
 -                      *sge = *ss->sg_list++;
 -      } else if (sge->length == 0 && sge->mr->lkey) {
 -              if (++sge->n >= RVT_SEGSZ) {
 -                      if (++sge->m >= sge->mr->mapsz)
 -                              return;
 -                      sge->n = 0;
 -              }
 -              sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
 -              sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
 -      }
 -}
 -
  /*
   * This is called with progress side lock held.
   */
@@@ -716,7 -798,7 +716,7 @@@ static noinline int build_verbs_ulp_pay
                        len);
                if (ret)
                        goto bail_txadd;
 -              update_sge(ss, len);
 +              rvt_update_sge(ss, len, false);
                length -= len;
        }
        return ret;
@@@ -991,7 -1073,7 +991,7 @@@ int hfi1_verbs_send_pio(struct rvt_qp *
  
                                if (slen > len)
                                        slen = len;
 -                              update_sge(ss, slen);
 +                              rvt_update_sge(ss, slen, false);
                                seg_pio_copy_mid(pbuf, addr, slen);
                                len -= slen;
                        }
@@@ -1302,7 -1384,6 +1302,7 @@@ static int query_port(struct rvt_dev_in
        struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
        u16 lid = ppd->lid;
  
 +      /* props being zeroed by the caller, avoid zeroing it here */
        props->lid = lid ? lid : 0;
        props->lmc = ppd->lmc;
        /* OPA logical states match IB logical states */
@@@ -1537,7 -1618,7 +1537,7 @@@ static int cntr_names_initialized
   * external strings.
   */
  static int init_cntr_names(const char *names_in,
 -                         const int names_len,
 +                         const size_t names_len,
                           int num_extra_names,
                           int *num_cntrs,
                           const char ***cntr_names)
@@@ -1703,7 -1784,7 +1703,7 @@@ int hfi1_register_ib_device(struct hfi1
        strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
        ibdev->owner = THIS_MODULE;
        ibdev->phys_port_cnt = dd->num_pports;
-       ibdev->dma_device = &dd->pcidev->dev;
+       ibdev->dev.parent = &dd->pcidev->dev;
        ibdev->modify_device = modify_device;
        ibdev->alloc_hw_stats = alloc_hw_stats;
        ibdev->get_hw_stats = get_hw_stats;
        dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu;
        dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
        dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
 +      dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
        dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
  
        /* completeion queue */
@@@ -1830,7 -1910,7 +1830,7 @@@ void hfi1_unregister_ib_device(struct h
  
  void hfi1_cnp_rcv(struct hfi1_packet *packet)
  {
 -      struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
 +      struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
        struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
        struct ib_header *hdr = packet->hdr;
        struct rvt_qp *qp = packet->qp;
index 6843409fba298abf1d593d0990c49054333cf43d,0669377491441c98b360641c379b5bb65e057f79..c3b41f95e70a5f1c39e89d91e48653fc05e2b20a
@@@ -32,7 -32,6 +32,7 @@@
   */
  #include <linux/acpi.h>
  #include <linux/of_platform.h>
 +#include <linux/module.h>
  #include <rdma/ib_addr.h>
  #include <rdma/ib_smi.h>
  #include <rdma/ib_user_verbs.h>
@@@ -250,7 -249,7 +250,7 @@@ static int hns_roce_query_port(struct i
        assert(port_num > 0);
        port = port_num - 1;
  
 -      memset(props, 0, sizeof(*props));
 +      /* props being zeroed by the caller, avoid zeroing it here */
  
        props->max_mtu = hr_dev->caps.max_mtu;
        props->gid_tbl_len = hr_dev->caps.gid_table_len[port];
@@@ -401,15 -400,14 +401,15 @@@ static int hns_roce_port_immutable(stru
        struct ib_port_attr attr;
        int ret;
  
 -      ret = hns_roce_query_port(ib_dev, port_num, &attr);
 +      immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
 +
 +      ret = ib_query_port(ib_dev, port_num, &attr);
        if (ret)
                return ret;
  
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
  
 -      immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
  
        return 0;
@@@ -439,7 -437,7 +439,7 @@@ static int hns_roce_register_device(str
  
        ib_dev->owner                   = THIS_MODULE;
        ib_dev->node_type               = RDMA_NODE_IB_CA;
-       ib_dev->dma_device              = dev;
+       ib_dev->dev.parent              = dev;
  
        ib_dev->phys_port_cnt           = hr_dev->caps.num_ports;
        ib_dev->local_dma_lkey          = hr_dev->caps.reserved_lkey;
index 5f695bf232a8f0d7bf332f65441db6a195d9092e,428088cdc32fa8b13bac9b7eef42b97aa24769b2..9b2849979756ba05ebf2c1f25073c1740d4b1520
@@@ -97,9 -97,19 +97,9 @@@ static int i40iw_query_port(struct ib_d
        struct i40iw_device *iwdev = to_iwdev(ibdev);
        struct net_device *netdev = iwdev->netdev;
  
 -      memset(props, 0, sizeof(*props));
 -
 +      /* props being zeroed by the caller, avoid zeroing it here */
        props->max_mtu = IB_MTU_4096;
 -      if (netdev->mtu >= 4096)
 -              props->active_mtu = IB_MTU_4096;
 -      else if (netdev->mtu >= 2048)
 -              props->active_mtu = IB_MTU_2048;
 -      else if (netdev->mtu >= 1024)
 -              props->active_mtu = IB_MTU_1024;
 -      else if (netdev->mtu >= 512)
 -              props->active_mtu = IB_MTU_512;
 -      else
 -              props->active_mtu = IB_MTU_256;
 +      props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
  
        props->lid = 1;
        if (netif_carrier_ok(iwdev->netdev))
@@@ -2496,15 -2506,14 +2496,15 @@@ static int i40iw_port_immutable(struct 
        struct ib_port_attr attr;
        int err;
  
 -      err = i40iw_query_port(ibdev, port_num, &attr);
 +      immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
 +
 +      err = ib_query_port(ibdev, port_num, &attr);
  
        if (err)
                return err;
  
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
 -      immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
  
        return 0;
  }
@@@ -2758,7 -2767,6 +2758,6 @@@ static struct i40iw_ib_device *i40iw_in
            (1ull << IB_USER_VERBS_CMD_POST_SEND);
        iwibdev->ibdev.phys_port_cnt = 1;
        iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count;
-       iwibdev->ibdev.dma_device = &pcidev->dev;
        iwibdev->ibdev.dev.parent = &pcidev->dev;
        iwibdev->ibdev.query_port = i40iw_query_port;
        iwibdev->ibdev.modify_port = i40iw_modify_port;
index 211cbbe9ccd1e7a97540df1fed2d838f03139e90,369d254bafebe74ccfa4ba22a742106ce74263ed..88608906ce2503987e4c100506d698a3da89a65c
@@@ -678,7 -678,7 +678,7 @@@ static u8 state_to_phys_state(enum ib_p
  }
  
  static int eth_link_query_port(struct ib_device *ibdev, u8 port,
 -                             struct ib_port_attr *props, int netw_view)
 +                             struct ib_port_attr *props)
  {
  
        struct mlx4_ib_dev *mdev = to_mdev(ibdev);
@@@ -741,11 -741,11 +741,11 @@@ int __mlx4_ib_query_port(struct ib_devi
  {
        int err;
  
 -      memset(props, 0, sizeof *props);
 +      /* props being zeroed by the caller, avoid zeroing it here */
  
        err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
                ib_link_query_port(ibdev, port, props, netw_view) :
 -                              eth_link_query_port(ibdev, port, props, netw_view);
 +                              eth_link_query_port(ibdev, port, props);
  
        return err;
  }
@@@ -1014,7 -1014,7 +1014,7 @@@ static int mlx4_ib_modify_port(struct i
  
        mutex_lock(&mdev->cap_mask_mutex);
  
 -      err = mlx4_ib_query_port(ibdev, port, &attr);
 +      err = ib_query_port(ibdev, port, &attr);
        if (err)
                goto out;
  
@@@ -2537,27 -2537,24 +2537,27 @@@ static int mlx4_port_immutable(struct i
        struct mlx4_ib_dev *mdev = to_mdev(ibdev);
        int err;
  
 -      err = mlx4_ib_query_port(ibdev, port_num, &attr);
 -      if (err)
 -              return err;
 -
 -      immutable->pkey_tbl_len = attr.pkey_tbl_len;
 -      immutable->gid_tbl_len = attr.gid_tbl_len;
 -
        if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
                immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
 +              immutable->max_mad_size = IB_MGMT_MAD_SIZE;
        } else {
                if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
                        immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
                if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
                        immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
                                RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
 +              immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET;
 +              if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE |
 +                  RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP))
 +                      immutable->max_mad_size = IB_MGMT_MAD_SIZE;
        }
  
 -      immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 +      err = ib_query_port(ibdev, port_num, &attr);
 +      if (err)
 +              return err;
 +
 +      immutable->pkey_tbl_len = attr.pkey_tbl_len;
 +      immutable->gid_tbl_len = attr.gid_tbl_len;
  
        return 0;
  }
@@@ -2628,7 -2625,7 +2628,7 @@@ static void *mlx4_ib_add(struct mlx4_de
        ibdev->ib_dev.phys_port_cnt     = mlx4_is_bonded(dev) ?
                                                1 : ibdev->num_ports;
        ibdev->ib_dev.num_comp_vectors  = dev->caps.num_comp_vectors;
-       ibdev->ib_dev.dma_device        = &dev->persist->pdev->dev;
+       ibdev->ib_dev.dev.parent        = &dev->persist->pdev->dev;
        ibdev->ib_dev.get_netdev        = mlx4_ib_get_netdev;
        ibdev->ib_dev.add_gid           = mlx4_ib_add_gid;
        ibdev->ib_dev.del_gid           = mlx4_ib_del_gid;
index 6a8498c052a5c3a164ab47a1525f88835634ab17,da3bf6c081deaec497ec4a0faaebf57e08797b1b..5b3355268725b8fd07917d996a612db4584ff7c1
@@@ -53,7 -53,6 +53,7 @@@
  #include <linux/in.h>
  #include <linux/etherdevice.h>
  #include <linux/mlx5/fs.h>
 +#include <linux/mlx5/vport.h>
  #include "mlx5_ib.h"
  
  #define DRIVER_NAME "mlx5_ib"
@@@ -65,6 -64,10 +65,6 @@@ MODULE_DESCRIPTION("Mellanox Connect-I
  MODULE_LICENSE("Dual BSD/GPL");
  MODULE_VERSION(DRIVER_VERSION);
  
 -static int deprecated_prof_sel = 2;
 -module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
 -MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
 -
  static char mlx5_version[] =
        DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
        DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
@@@ -171,7 -174,7 +171,7 @@@ static int mlx5_query_port_roce(struct 
        enum ib_mtu ndev_ib_mtu;
        u16 qkey_viol_cntr;
  
 -      memset(props, 0, sizeof(*props));
 +      /* props being zeroed by the caller, avoid zeroing it here */
  
        props->port_cap_flags  |= IB_PORT_CM_SUP;
        props->port_cap_flags  |= IB_PORT_IP_BASED_GIDS;
@@@ -322,27 -325,6 +322,27 @@@ __be16 mlx5_get_roce_udp_sport(struct m
        return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
  }
  
 +int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
 +                         int index, enum ib_gid_type *gid_type)
 +{
 +      struct ib_gid_attr attr;
 +      union ib_gid gid;
 +      int ret;
 +
 +      ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
 +      if (ret)
 +              return ret;
 +
 +      if (!attr.ndev)
 +              return -ENODEV;
 +
 +      dev_put(attr.ndev);
 +
 +      *gid_type = attr.gid_type;
 +
 +      return 0;
 +}
 +
  static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
  {
        if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
@@@ -582,15 -564,8 +582,15 @@@ static int mlx5_ib_query_device(struct 
                props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
  
        if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
 -              if (MLX5_CAP_ETH(mdev, csum_cap))
 +              if (MLX5_CAP_ETH(mdev, csum_cap)) {
 +                      /* Legacy bit to support old userspace libraries */
                        props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
 +                      props->raw_packet_caps |= IB_RAW_PACKET_CAP_IP_CSUM;
 +              }
 +
 +              if (MLX5_CAP_ETH(dev->mdev, vlan_cap))
 +                      props->raw_packet_caps |=
 +                              IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
  
                if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
                        max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
        }
  
        if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
 -          MLX5_CAP_ETH(dev->mdev, scatter_fcs))
 +          MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
 +              /* Legacy bit to support old userspace libraries */
                props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
 +              props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
 +      }
  
        if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
                props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
                        1 << MLX5_CAP_GEN(dev->mdev, log_max_rq);
        }
  
 -      if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
 -                      uhw->outlen)) {
 -              resp.mlx5_ib_support_multi_pkt_send_wqes =
 -                      MLX5_CAP_ETH(mdev, multi_pkt_send_wqe);
 -              resp.response_length +=
 -                      sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
 -      }
 -
 -      if (field_avail(typeof(resp), reserved, uhw->outlen))
 -              resp.response_length += sizeof(resp.reserved);
 -
        if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
                resp.cqe_comp_caps.max_num =
                        MLX5_CAP_GEN(dev->mdev, cqe_compression) ?
                resp.response_length += sizeof(resp.packet_pacing_caps);
        }
  
 +      if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
 +                      uhw->outlen)) {
 +              resp.mlx5_ib_support_multi_pkt_send_wqes =
 +                      MLX5_CAP_ETH(mdev, multi_pkt_send_wqe);
 +              resp.response_length +=
 +                      sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
 +      }
 +
 +      if (field_avail(typeof(resp), reserved, uhw->outlen))
 +              resp.response_length += sizeof(resp.reserved);
 +
        if (uhw->outlen) {
                err = ib_copy_to_udata(uhw, &resp, resp.response_length);
  
@@@ -858,7 -830,7 +858,7 @@@ static int mlx5_query_hca_port(struct i
                goto out;
        }
  
 -      memset(props, 0, sizeof(*props));
 +      /* props being zeroed by the caller, avoid zeroing it here */
  
        err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
        if (err)
@@@ -996,31 -968,6 +996,31 @@@ static int mlx5_ib_modify_device(struc
        return err;
  }
  
 +static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask,
 +                              u32 value)
 +{
 +      struct mlx5_hca_vport_context ctx = {};
 +      int err;
 +
 +      err = mlx5_query_hca_vport_context(dev->mdev, 0,
 +                                         port_num, 0, &ctx);
 +      if (err)
 +              return err;
 +
 +      if (~ctx.cap_mask1_perm & mask) {
 +              mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n",
 +                           mask, ctx.cap_mask1_perm);
 +              return -EINVAL;
 +      }
 +
 +      ctx.cap_mask1 = value;
 +      ctx.cap_mask1_perm = mask;
 +      err = mlx5_core_modify_hca_vport_context(dev->mdev, 0,
 +                                               port_num, 0, &ctx);
 +
 +      return err;
 +}
 +
  static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
                               struct ib_port_modify *props)
  {
        struct ib_port_attr attr;
        u32 tmp;
        int err;
 +      u32 change_mask;
 +      u32 value;
 +      bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) ==
 +                    IB_LINK_LAYER_INFINIBAND);
 +
 +      if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) {
 +              change_mask = props->clr_port_cap_mask | props->set_port_cap_mask;
 +              value = ~props->clr_port_cap_mask | props->set_port_cap_mask;
 +              return set_port_caps_atomic(dev, port, change_mask, value);
 +      }
  
        mutex_lock(&dev->cap_mask_mutex);
  
 -      err = mlx5_ib_query_port(ibdev, port, &attr);
 +      err = ib_query_port(ibdev, port, &attr);
        if (err)
                goto out;
  
@@@ -1055,86 -992,6 +1055,86 @@@ out
        return err;
  }
  
 +static void print_lib_caps(struct mlx5_ib_dev *dev, u64 caps)
 +{
 +      mlx5_ib_dbg(dev, "MLX5_LIB_CAP_4K_UAR = %s\n",
 +                  caps & MLX5_LIB_CAP_4K_UAR ? "y" : "n");
 +}
 +
 +static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
 +                           struct mlx5_ib_alloc_ucontext_req_v2 *req,
 +                           u32 *num_sys_pages)
 +{
 +      int uars_per_sys_page;
 +      int bfregs_per_sys_page;
 +      int ref_bfregs = req->total_num_bfregs;
 +
 +      if (req->total_num_bfregs == 0)
 +              return -EINVAL;
 +
 +      BUILD_BUG_ON(MLX5_MAX_BFREGS % MLX5_NON_FP_BFREGS_IN_PAGE);
 +      BUILD_BUG_ON(MLX5_MAX_BFREGS < MLX5_NON_FP_BFREGS_IN_PAGE);
 +
 +      if (req->total_num_bfregs > MLX5_MAX_BFREGS)
 +              return -ENOMEM;
 +
 +      uars_per_sys_page = get_uars_per_sys_page(dev, lib_uar_4k);
 +      bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR;
 +      req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page);
 +      *num_sys_pages = req->total_num_bfregs / bfregs_per_sys_page;
 +
 +      if (req->num_low_latency_bfregs > req->total_num_bfregs - 1)
 +              return -EINVAL;
 +
 +      mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, alloated %d, using %d sys pages\n",
 +                  MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no",
 +                  lib_uar_4k ? "yes" : "no", ref_bfregs,
 +                  req->total_num_bfregs, *num_sys_pages);
 +
 +      return 0;
 +}
 +
 +static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
 +{
 +      struct mlx5_bfreg_info *bfregi;
 +      int err;
 +      int i;
 +
 +      bfregi = &context->bfregi;
 +      for (i = 0; i < bfregi->num_sys_pages; i++) {
 +              err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]);
 +              if (err)
 +                      goto error;
 +
 +              mlx5_ib_dbg(dev, "allocated uar %d\n", bfregi->sys_pages[i]);
 +      }
 +      return 0;
 +
 +error:
 +      for (--i; i >= 0; i--)
 +              if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]))
 +                      mlx5_ib_warn(dev, "failed to free uar %d\n", i);
 +
 +      return err;
 +}
 +
 +static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
 +{
 +      struct mlx5_bfreg_info *bfregi;
 +      int err;
 +      int i;
 +
 +      bfregi = &context->bfregi;
 +      for (i = 0; i < bfregi->num_sys_pages; i++) {
 +              err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
 +              if (err) {
 +                      mlx5_ib_warn(dev, "failed to free uar %d\n", i);
 +                      return err;
 +              }
 +      }
 +      return 0;
 +}
 +
  static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
                                                  struct ib_udata *udata)
  {
        struct mlx5_ib_alloc_ucontext_req_v2 req = {};
        struct mlx5_ib_alloc_ucontext_resp resp = {};
        struct mlx5_ib_ucontext *context;
 -      struct mlx5_uuar_info *uuari;
 -      struct mlx5_uar *uars;
 -      int gross_uuars;
 -      int num_uars;
 +      struct mlx5_bfreg_info *bfregi;
        int ver;
 -      int uuarn;
        int err;
 -      int i;
        size_t reqlen;
        size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
                                     max_cqe_version);
 +      bool lib_uar_4k;
  
        if (!dev->ib_active)
                return ERR_PTR(-EAGAIN);
        if (req.flags)
                return ERR_PTR(-EINVAL);
  
 -      if (req.total_num_uuars > MLX5_MAX_UUARS)
 -              return ERR_PTR(-ENOMEM);
 -
 -      if (req.total_num_uuars == 0)
 -              return ERR_PTR(-EINVAL);
 -
        if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
                return ERR_PTR(-EOPNOTSUPP);
  
 -      if (reqlen > sizeof(req) &&
 -          !ib_is_udata_cleared(udata, sizeof(req),
 -                               reqlen - sizeof(req)))
 -              return ERR_PTR(-EOPNOTSUPP);
 -
 -      req.total_num_uuars = ALIGN(req.total_num_uuars,
 -                                  MLX5_NON_FP_BF_REGS_PER_PAGE);
 -      if (req.num_low_latency_uuars > req.total_num_uuars - 1)
 +      req.total_num_bfregs = ALIGN(req.total_num_bfregs,
 +                                  MLX5_NON_FP_BFREGS_PER_UAR);
 +      if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
                return ERR_PTR(-EINVAL);
  
 -      num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
 -      gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
        resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
        if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
                resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
        resp.cqe_version = min_t(__u8,
                                 (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
                                 req.max_cqe_version);
 +      resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
 +                              MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
 +      resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
 +                                      MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1;
        resp.response_length = min(offsetof(typeof(resp), response_length) +
                                   sizeof(resp.response_length), udata->outlen);
  
        if (!context)
                return ERR_PTR(-ENOMEM);
  
 -      uuari = &context->uuari;
 -      mutex_init(&uuari->lock);
 -      uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
 -      if (!uars) {
 -              err = -ENOMEM;
 +      lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
 +      bfregi = &context->bfregi;
 +
 +      /* updates req->total_num_bfregs */
 +      err = calc_total_bfregs(dev, lib_uar_4k, &req, &bfregi->num_sys_pages);
 +      if (err)
                goto out_ctx;
 -      }
  
 -      uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
 -                              sizeof(*uuari->bitmap),
 +      mutex_init(&bfregi->lock);
 +      bfregi->lib_uar_4k = lib_uar_4k;
 +      bfregi->count = kcalloc(req.total_num_bfregs, sizeof(*bfregi->count),
                                GFP_KERNEL);
 -      if (!uuari->bitmap) {
 +      if (!bfregi->count) {
                err = -ENOMEM;
 -              goto out_uar_ctx;
 -      }
 -      /*
 -       * clear all fast path uuars
 -       */
 -      for (i = 0; i < gross_uuars; i++) {
 -              uuarn = i & 3;
 -              if (uuarn == 2 || uuarn == 3)
 -                      set_bit(i, uuari->bitmap);
 +              goto out_ctx;
        }
  
 -      uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
 -      if (!uuari->count) {
 +      bfregi->sys_pages = kcalloc(bfregi->num_sys_pages,
 +                                  sizeof(*bfregi->sys_pages),
 +                                  GFP_KERNEL);
 +      if (!bfregi->sys_pages) {
                err = -ENOMEM;
 -              goto out_bitmap;
 +              goto out_count;
        }
  
 -      for (i = 0; i < num_uars; i++) {
 -              err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
 -              if (err)
 -                      goto out_count;
 -      }
 +      err = allocate_uars(dev, context);
 +      if (err)
 +              goto out_sys_pages;
  
  #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
        context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
  #endif
  
 +      context->upd_xlt_page = __get_free_page(GFP_KERNEL);
 +      if (!context->upd_xlt_page) {
 +              err = -ENOMEM;
 +              goto out_uars;
 +      }
 +      mutex_init(&context->upd_xlt_page_mutex);
 +
        if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
                err = mlx5_core_alloc_transport_domain(dev->mdev,
                                                       &context->tdn);
                if (err)
 -                      goto out_uars;
 +                      goto out_page;
        }
  
        INIT_LIST_HEAD(&context->vma_private_list);
        INIT_LIST_HEAD(&context->db_page_list);
        mutex_init(&context->db_page_mutex);
  
 -      resp.tot_uuars = req.total_num_uuars;
 +      resp.tot_bfregs = req.total_num_bfregs;
        resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
  
        if (field_avail(typeof(resp), cqe_version, udata->outlen))
                resp.response_length += sizeof(resp.cmds_supp_uhw);
        }
  
 +      if (field_avail(typeof(resp), eth_min_inline, udata->outlen)) {
 +              if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
 +                      mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline);
 +                      resp.eth_min_inline++;
 +              }
 +              resp.response_length += sizeof(resp.eth_min_inline);
 +      }
 +
        /*
         * We don't want to expose information from the PCI bar that is located
         * after 4096 bytes, so if the arch only supports larger pages, let's
         * pretend we don't support reading the HCA's core clock. This is also
         * forced by mmap function.
         */
 -      if (PAGE_SIZE <= 4096 &&
 -          field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
 -              resp.comp_mask |=
 -                      MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
 -              resp.hca_core_clock_offset =
 -                      offsetof(struct mlx5_init_seg, internal_timer_h) %
 -                      PAGE_SIZE;
 +      if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
 +              if (PAGE_SIZE <= 4096) {
 +                      resp.comp_mask |=
 +                              MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
 +                      resp.hca_core_clock_offset =
 +                              offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE;
 +              }
                resp.response_length += sizeof(resp.hca_core_clock_offset) +
                                        sizeof(resp.reserved2);
        }
  
 +      if (field_avail(typeof(resp), log_uar_size, udata->outlen))
 +              resp.response_length += sizeof(resp.log_uar_size);
 +
 +      if (field_avail(typeof(resp), num_uars_per_page, udata->outlen))
 +              resp.response_length += sizeof(resp.num_uars_per_page);
 +
        err = ib_copy_to_udata(udata, &resp, resp.response_length);
        if (err)
                goto out_td;
  
 -      uuari->ver = ver;
 -      uuari->num_low_latency_uuars = req.num_low_latency_uuars;
 -      uuari->uars = uars;
 -      uuari->num_uars = num_uars;
 +      bfregi->ver = ver;
 +      bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
        context->cqe_version = resp.cqe_version;
 +      context->lib_caps = req.lib_caps;
 +      print_lib_caps(dev, context->lib_caps);
  
        return &context->ibucontext;
  
@@@ -1312,21 -1168,20 +1312,21 @@@ out_td
        if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
                mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
  
 +out_page:
 +      free_page(context->upd_xlt_page);
 +
  out_uars:
 -      for (i--; i >= 0; i--)
 -              mlx5_cmd_free_uar(dev->mdev, uars[i].index);
 -out_count:
 -      kfree(uuari->count);
 +      deallocate_uars(dev, context);
  
 -out_bitmap:
 -      kfree(uuari->bitmap);
 +out_sys_pages:
 +      kfree(bfregi->sys_pages);
  
 -out_uar_ctx:
 -      kfree(uars);
 +out_count:
 +      kfree(bfregi->count);
  
  out_ctx:
        kfree(context);
 +
        return ERR_PTR(err);
  }
  
@@@ -1334,31 -1189,28 +1334,31 @@@ static int mlx5_ib_dealloc_ucontext(str
  {
        struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
        struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
 -      struct mlx5_uuar_info *uuari = &context->uuari;
 -      int i;
 +      struct mlx5_bfreg_info *bfregi;
  
 +      bfregi = &context->bfregi;
        if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
                mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
  
 -      for (i = 0; i < uuari->num_uars; i++) {
 -              if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
 -                      mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
 -      }
 -
 -      kfree(uuari->count);
 -      kfree(uuari->bitmap);
 -      kfree(uuari->uars);
 +      free_page(context->upd_xlt_page);
 +      deallocate_uars(dev, context);
 +      kfree(bfregi->sys_pages);
 +      kfree(bfregi->count);
        kfree(context);
  
        return 0;
  }
  
 -static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
 +static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
 +                               struct mlx5_bfreg_info *bfregi,
 +                               int idx)
  {
 -      return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
 +      int fw_uars_per_page;
 +
 +      fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1;
 +
 +      return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) +
 +                      bfregi->sys_pages[idx] / fw_uars_per_page;
  }
  
  static int get_command(unsigned long offset)
@@@ -1513,23 -1365,11 +1513,23 @@@ static int uar_mmap(struct mlx5_ib_dev 
                    struct vm_area_struct *vma,
                    struct mlx5_ib_ucontext *context)
  {
 -      struct mlx5_uuar_info *uuari = &context->uuari;
 +      struct mlx5_bfreg_info *bfregi = &context->bfregi;
        int err;
        unsigned long idx;
        phys_addr_t pfn, pa;
        pgprot_t prot;
 +      int uars_per_page;
 +
 +      if (vma->vm_end - vma->vm_start != PAGE_SIZE)
 +              return -EINVAL;
 +
 +      uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k);
 +      idx = get_index(vma->vm_pgoff);
 +      if (idx % uars_per_page ||
 +          idx * uars_per_page >= bfregi->num_sys_pages) {
 +              mlx5_ib_warn(dev, "invalid uar index %lu\n", idx);
 +              return -EINVAL;
 +      }
  
        switch (cmd) {
        case MLX5_IB_MMAP_WC_PAGE:
                return -EINVAL;
        }
  
 -      if (vma->vm_end - vma->vm_start != PAGE_SIZE)
 -              return -EINVAL;
 -
 -      idx = get_index(vma->vm_pgoff);
 -      if (idx >= uuari->num_uars)
 -              return -EINVAL;
 -
 -      pfn = uar_index2pfn(dev, uuari->uars[idx].index);
 +      pfn = uar_index2pfn(dev, bfregi, idx);
        mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
  
        vma->vm_page_prot = prot;
@@@ -1723,7 -1570,6 +1723,7 @@@ static void set_tos(void *outer_c, voi
  #define LAST_IPV6_FIELD traffic_class
  #define LAST_TCP_UDP_FIELD src_port
  #define LAST_TUNNEL_FIELD tunnel_id
 +#define LAST_FLOW_TAG_FIELD tag_id
  
  /* Field is the last supported field */
  #define FIELDS_NOT_SUPPORTED(filter, field)\
                   sizeof(filter.field))
  
  static int parse_flow_attr(u32 *match_c, u32 *match_v,
 -                         const union ib_flow_spec *ib_spec)
 +                         const union ib_flow_spec *ib_spec, u32 *tag_id)
  {
        void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
                                           misc_parameters);
        switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
        case IB_FLOW_SPEC_ETH:
                if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
 -                      return -ENOTSUPP;
 +                      return -EOPNOTSUPP;
  
                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
                                             dmac_47_16),
  
                if (ib_spec->eth.mask.vlan_tag) {
                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 -                               vlan_tag, 1);
 +                               cvlan_tag, 1);
                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 -                               vlan_tag, 1);
 +                               cvlan_tag, 1);
  
                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
                                 first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
                break;
        case IB_FLOW_SPEC_IPV4:
                if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
 -                      return -ENOTSUPP;
 +                      return -EOPNOTSUPP;
  
                MLX5_SET(fte_match_set_lyr_2_4, headers_c,
                         ethertype, 0xffff);
                break;
        case IB_FLOW_SPEC_IPV6:
                if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
 -                      return -ENOTSUPP;
 +                      return -EOPNOTSUPP;
  
                MLX5_SET(fte_match_set_lyr_2_4, headers_c,
                         ethertype, 0xffff);
        case IB_FLOW_SPEC_TCP:
                if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
                                         LAST_TCP_UDP_FIELD))
 -                      return -ENOTSUPP;
 +                      return -EOPNOTSUPP;
  
                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
                         0xff);
        case IB_FLOW_SPEC_UDP:
                if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
                                         LAST_TCP_UDP_FIELD))
 -                      return -ENOTSUPP;
 +                      return -EOPNOTSUPP;
  
                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
                         0xff);
        case IB_FLOW_SPEC_VXLAN_TUNNEL:
                if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
                                         LAST_TUNNEL_FIELD))
 -                      return -ENOTSUPP;
 +                      return -EOPNOTSUPP;
  
                MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
                         ntohl(ib_spec->tunnel.mask.tunnel_id));
                MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
                         ntohl(ib_spec->tunnel.val.tunnel_id));
                break;
 +      case IB_FLOW_SPEC_ACTION_TAG:
 +              if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
 +                                       LAST_FLOW_TAG_FIELD))
 +                      return -EOPNOTSUPP;
 +              if (ib_spec->flow_tag.tag_id >= BIT(24))
 +                      return -EINVAL;
 +
 +              *tag_id = ib_spec->flow_tag.tag_id;
 +              break;
        default:
                return -EINVAL;
        }
@@@ -2118,7 -1955,6 +2118,7 @@@ static struct mlx5_ib_flow_handler *cre
        struct mlx5_flow_spec *spec;
        const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
        unsigned int spec_index;
 +      u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
        int err = 0;
  
        if (!is_valid_attr(flow_attr))
  
        for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
                err = parse_flow_attr(spec->match_criteria,
 -                                    spec->match_value, ib_flow);
 +                                    spec->match_value, ib_flow, &flow_tag);
                if (err < 0)
                        goto free;
  
        spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
        flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
                MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
 -      flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
 +
 +      if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
 +          (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
 +           flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
 +              mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
 +                           flow_tag, flow_attr->type);
 +              err = -EINVAL;
 +              goto free;
 +      }
 +      flow_act.flow_tag = flow_tag;
        handler->rule = mlx5_add_flow_rules(ft, spec,
                                            &flow_act,
                                            dst, 1);
@@@ -2624,35 -2451,6 +2624,35 @@@ static void mlx5_ib_event(struct mlx5_c
                ibdev->ib_active = false;
  }
  
 +static int set_has_smi_cap(struct mlx5_ib_dev *dev)
 +{
 +      struct mlx5_hca_vport_context vport_ctx;
 +      int err;
 +      int port;
 +
 +      for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
 +              dev->mdev->port_caps[port - 1].has_smi = false;
 +              if (MLX5_CAP_GEN(dev->mdev, port_type) ==
 +                  MLX5_CAP_PORT_TYPE_IB) {
 +                      if (MLX5_CAP_GEN(dev->mdev, ib_virt)) {
 +                              err = mlx5_query_hca_vport_context(dev->mdev, 0,
 +                                                                 port, 0,
 +                                                                 &vport_ctx);
 +                              if (err) {
 +                                      mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n",
 +                                                  port, err);
 +                                      return err;
 +                              }
 +                              dev->mdev->port_caps[port - 1].has_smi =
 +                                      vport_ctx.has_smi;
 +                      } else {
 +                              dev->mdev->port_caps[port - 1].has_smi = true;
 +                      }
 +              }
 +      }
 +      return 0;
 +}
 +
  static void get_ext_port_caps(struct mlx5_ib_dev *dev)
  {
        int port;
@@@ -2677,10 -2475,6 +2677,10 @@@ static int get_port_caps(struct mlx5_ib
        if (!dprops)
                goto out;
  
 +      err = set_has_smi_cap(dev);
 +      if (err)
 +              goto out;
 +
        err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
        if (err) {
                mlx5_ib_warn(dev, "query_device failed %d\n", err);
        }
  
        for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
 +              memset(pprops, 0, sizeof(*pprops));
                err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
                if (err) {
                        mlx5_ib_warn(dev, "query_port %d failed %d\n",
@@@ -2983,13 -2776,11 +2983,13 @@@ static u32 get_core_cap_flags(struct ib
        if (ll == IB_LINK_LAYER_INFINIBAND)
                return RDMA_CORE_PORT_IBA_IB;
  
 +      ret = RDMA_CORE_PORT_RAW_PACKET;
 +
        if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
 -              return 0;
 +              return ret;
  
        if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
 -              return 0;
 +              return ret;
  
        if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
                ret |= RDMA_CORE_PORT_IBA_ROCE;
@@@ -3008,9 -2799,7 +3008,9 @@@ static int mlx5_port_immutable(struct i
        enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
        int err;
  
 -      err = mlx5_ib_query_port(ibdev, port_num, &attr);
 +      immutable->core_cap_flags = get_core_cap_flags(ibdev);
 +
 +      err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
  
@@@ -3131,102 -2920,13 +3131,102 @@@ static void mlx5_disable_eth(struct mlx
                mlx5_nic_vport_disable_roce(dev->mdev);
  }
  
 +struct mlx5_ib_q_counter {
 +      const char *name;
 +      size_t offset;
 +};
 +
 +#define INIT_Q_COUNTER(_name)         \
 +      { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
 +
 +static const struct mlx5_ib_q_counter basic_q_cnts[] = {
 +      INIT_Q_COUNTER(rx_write_requests),
 +      INIT_Q_COUNTER(rx_read_requests),
 +      INIT_Q_COUNTER(rx_atomic_requests),
 +      INIT_Q_COUNTER(out_of_buffer),
 +};
 +
 +static const struct mlx5_ib_q_counter out_of_seq_q_cnts[] = {
 +      INIT_Q_COUNTER(out_of_sequence),
 +};
 +
 +static const struct mlx5_ib_q_counter retrans_q_cnts[] = {
 +      INIT_Q_COUNTER(duplicate_request),
 +      INIT_Q_COUNTER(rnr_nak_retry_err),
 +      INIT_Q_COUNTER(packet_seq_err),
 +      INIT_Q_COUNTER(implied_nak_seq_err),
 +      INIT_Q_COUNTER(local_ack_timeout_err),
 +};
 +
  static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
  {
        unsigned int i;
  
 -      for (i = 0; i < dev->num_ports; i++)
 +      for (i = 0; i < dev->num_ports; i++) {
                mlx5_core_dealloc_q_counter(dev->mdev,
 -                                          dev->port[i].q_cnt_id);
 +                                          dev->port[i].q_cnts.set_id);
 +              kfree(dev->port[i].q_cnts.names);
 +              kfree(dev->port[i].q_cnts.offsets);
 +      }
 +}
 +
 +static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev,
 +                                    const char ***names,
 +                                    size_t **offsets,
 +                                    u32 *num)
 +{
 +      u32 num_counters;
 +
 +      num_counters = ARRAY_SIZE(basic_q_cnts);
 +
 +      if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
 +              num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
 +
 +      if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
 +              num_counters += ARRAY_SIZE(retrans_q_cnts);
 +
 +      *names = kcalloc(num_counters, sizeof(**names), GFP_KERNEL);
 +      if (!*names)
 +              return -ENOMEM;
 +
 +      *offsets = kcalloc(num_counters, sizeof(**offsets), GFP_KERNEL);
 +      if (!*offsets)
 +              goto err_names;
 +
 +      *num = num_counters;
 +
 +      return 0;
 +
 +err_names:
 +      kfree(*names);
 +      return -ENOMEM;
 +}
 +
 +static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev,
 +                                  const char **names,
 +                                  size_t *offsets)
 +{
 +      int i;
 +      int j = 0;
 +
 +      for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
 +              names[j] = basic_q_cnts[i].name;
 +              offsets[j] = basic_q_cnts[i].offset;
 +      }
 +
 +      if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
 +              for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
 +                      names[j] = out_of_seq_q_cnts[i].name;
 +                      offsets[j] = out_of_seq_q_cnts[i].offset;
 +              }
 +      }
 +
 +      if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
 +              for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
 +                      names[j] = retrans_q_cnts[i].name;
 +                      offsets[j] = retrans_q_cnts[i].offset;
 +              }
 +      }
  }
  
  static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
        int ret;
  
        for (i = 0; i < dev->num_ports; i++) {
 +              struct mlx5_ib_port *port = &dev->port[i];
 +
                ret = mlx5_core_alloc_q_counter(dev->mdev,
 -                                              &dev->port[i].q_cnt_id);
 +                                              &port->q_cnts.set_id);
                if (ret) {
                        mlx5_ib_warn(dev,
                                     "couldn't allocate queue counter for port %d, err %d\n",
                                     i + 1, ret);
                        goto dealloc_counters;
                }
 +
 +              ret = __mlx5_ib_alloc_q_counters(dev,
 +                                               &port->q_cnts.names,
 +                                               &port->q_cnts.offsets,
 +                                               &port->q_cnts.num_counters);
 +              if (ret)
 +                      goto dealloc_counters;
 +
 +              mlx5_ib_fill_q_counters(dev, port->q_cnts.names,
 +                                      port->q_cnts.offsets);
        }
  
        return 0;
  dealloc_counters:
        while (--i >= 0)
                mlx5_core_dealloc_q_counter(dev->mdev,
 -                                          dev->port[i].q_cnt_id);
 +                                          dev->port[i].q_cnts.set_id);
  
        return ret;
  }
  
 -static const char * const names[] = {
 -      "rx_write_requests",
 -      "rx_read_requests",
 -      "rx_atomic_requests",
 -      "out_of_buffer",
 -      "out_of_sequence",
 -      "duplicate_request",
 -      "rnr_nak_retry_err",
 -      "packet_seq_err",
 -      "implied_nak_seq_err",
 -      "local_ack_timeout_err",
 -};
 -
 -static const size_t stats_offsets[] = {
 -      MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests),
 -      MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests),
 -      MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests),
 -      MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer),
 -      MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence),
 -      MLX5_BYTE_OFF(query_q_counter_out, duplicate_request),
 -      MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err),
 -      MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err),
 -      MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err),
 -      MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err),
 -};
 -
  static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
                                                    u8 port_num)
  {
 -      BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets));
 +      struct mlx5_ib_dev *dev = to_mdev(ibdev);
 +      struct mlx5_ib_port *port = &dev->port[port_num - 1];
  
        /* We support only per port stats */
        if (port_num == 0)
                return NULL;
  
 -      return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names),
 +      return rdma_alloc_hw_stats_struct(port->q_cnts.names,
 +                                        port->q_cnts.num_counters,
                                          RDMA_HW_STATS_DEFAULT_LIFESPAN);
  }
  
  static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
                                struct rdma_hw_stats *stats,
 -                              u8 port, int index)
 +                              u8 port_num, int index)
  {
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
 +      struct mlx5_ib_port *port = &dev->port[port_num - 1];
        int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
        void *out;
        __be32 val;
        int ret;
        int i;
  
 -      if (!port || !stats)
 +      if (!stats)
                return -ENOSYS;
  
        out = mlx5_vzalloc(outlen);
                return -ENOMEM;
  
        ret = mlx5_core_query_q_counter(dev->mdev,
 -                                      dev->port[port - 1].q_cnt_id, 0,
 +                                      port->q_cnts.set_id, 0,
                                        out, outlen);
        if (ret)
                goto free;
  
 -      for (i = 0; i < ARRAY_SIZE(names); i++) {
 -              val = *(__be32 *)(out + stats_offsets[i]);
 +      for (i = 0; i < port->q_cnts.num_counters; i++) {
 +              val = *(__be32 *)(out + port->q_cnts.offsets[i]);
                stats->value[i] = (u64)be32_to_cpu(val);
        }
 +
  free:
        kvfree(out);
 -      return ARRAY_SIZE(names);
 +      return port->q_cnts.num_counters;
  }
  
  static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
        if (mlx5_use_mad_ifc(dev))
                get_ext_port_caps(dev);
  
 -      MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
 -
        if (!mlx5_lag_is_active(mdev))
                name = "mlx5_%d";
        else
        dev->ib_dev.phys_port_cnt     = dev->num_ports;
        dev->ib_dev.num_comp_vectors    =
                dev->mdev->priv.eq_table.num_comp_vectors;
-       dev->ib_dev.dma_device  = &mdev->pdev->dev;
+       dev->ib_dev.dev.parent          = &mdev->pdev->dev;
  
        dev->ib_dev.uverbs_abi_ver      = MLX5_IB_UVERBS_ABI_VERSION;
        dev->ib_dev.uverbs_cmd_mask     =
                        (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
        }
  
 -      if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
 -          MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
 +      if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
                dev->ib_dev.get_hw_stats        = mlx5_ib_get_hw_stats;
                dev->ib_dev.alloc_hw_stats      = mlx5_ib_alloc_hw_stats;
        }
        if (err)
                goto err_rsrc;
  
 -      err = mlx5_ib_alloc_q_counters(dev);
 +      if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
 +              err = mlx5_ib_alloc_q_counters(dev);
 +              if (err)
 +                      goto err_odp;
 +      }
 +
 +      dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
 +      if (!dev->mdev->priv.uar)
 +              goto err_q_cnt;
 +
 +      err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
 +      if (err)
 +              goto err_uar_page;
 +
 +      err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true);
        if (err)
 -              goto err_odp;
 +              goto err_bfreg;
  
        err = ib_register_device(&dev->ib_dev, NULL);
        if (err)
 -              goto err_q_cnt;
 +              goto err_fp_bfreg;
  
        err = create_umr_res(dev);
        if (err)
@@@ -3563,18 -3262,8 +3563,18 @@@ err_umrc
  err_dev:
        ib_unregister_device(&dev->ib_dev);
  
 +err_fp_bfreg:
 +      mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
 +
 +err_bfreg:
 +      mlx5_free_bfreg(dev->mdev, &dev->bfreg);
 +
 +err_uar_page:
 +      mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
 +
  err_q_cnt:
 -      mlx5_ib_dealloc_q_counters(dev);
 +      if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
 +              mlx5_ib_dealloc_q_counters(dev);
  
  err_odp:
        mlx5_ib_odp_remove_one(dev);
@@@ -3604,11 -3293,7 +3604,11 @@@ static void mlx5_ib_remove(struct mlx5_
  
        mlx5_remove_netdev_notifier(dev);
        ib_unregister_device(&dev->ib_dev);
 -      mlx5_ib_dealloc_q_counters(dev);
 +      mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
 +      mlx5_free_bfreg(dev->mdev, &dev->bfreg);
 +      mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
 +      if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
 +              mlx5_ib_dealloc_q_counters(dev);
        destroy_umrc_res(dev);
        mlx5_ib_odp_remove_one(dev);
        destroy_dev_resources(&dev->devr);
@@@ -3622,9 -3307,6 +3622,9 @@@ static struct mlx5_interface mlx5_ib_in
        .add            = mlx5_ib_add,
        .remove         = mlx5_ib_remove,
        .event          = mlx5_ib_event,
 +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 +      .pfault         = mlx5_ib_pfault,
 +#endif
        .protocol       = MLX5_INTERFACE_PROTOCOL_IB,
  };
  
@@@ -3632,16 -3314,28 +3632,16 @@@ static int __init mlx5_ib_init(void
  {
        int err;
  
 -      if (deprecated_prof_sel != 2)
 -              pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
 -
 -      err = mlx5_ib_odp_init();
 -      if (err)
 -              return err;
 +      mlx5_ib_odp_init();
  
        err = mlx5_register_interface(&mlx5_ib_interface);
 -      if (err)
 -              goto clean_odp;
 -
 -      return err;
  
 -clean_odp:
 -      mlx5_ib_odp_cleanup();
        return err;
  }
  
  static void __exit mlx5_ib_cleanup(void)
  {
        mlx5_unregister_interface(&mlx5_ib_interface);
 -      mlx5_ib_odp_cleanup();
  }
  
  module_init(mlx5_ib_init);
index 3c1f483d003f76d3330fcaa95091828dcb80898d,87ca81b85fd0f96285f08d3ef2e87bfaea3ff542..b8f9382a8b7dd72daf02b37804b912999dc500a1
@@@ -46,10 -46,14 +46,10 @@@ enum 
  };
  
  #define MLX5_UMR_ALIGN 2048
 -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 -static __be64 mlx5_ib_update_mtt_emergency_buffer[
 -              MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
 -      __aligned(MLX5_UMR_ALIGN);
 -static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
 -#endif
  
  static int clean_mr(struct mlx5_ib_mr *mr);
 +static int use_umr(struct mlx5_ib_dev *dev, int order);
 +static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
  
  static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  {
@@@ -130,7 -134,6 +130,7 @@@ static void reg_mr_callback(int status
                return;
        }
  
 +      mr->mmkey.type = MLX5_MKEY_MR;
        spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
        key = dev->mdev->priv.mkey_key++;
        spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
        if (err)
                pr_err("Error inserting to mkey tree. 0x%x\n", -err);
        write_unlock_irqrestore(&table->lock, flags);
 +
 +      if (!completion_done(&ent->compl))
 +              complete(&ent->compl);
  }
  
  static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
        struct mlx5_cache_ent *ent = &cache->ent[c];
        int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
        struct mlx5_ib_mr *mr;
 -      int npages = 1 << ent->order;
        void *mkc;
        u32 *in;
        int err = 0;
  
                MLX5_SET(mkc, mkc, free, 1);
                MLX5_SET(mkc, mkc, umr_en, 1);
 -              MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
 +              MLX5_SET(mkc, mkc, access_mode, ent->access_mode);
  
                MLX5_SET(mkc, mkc, qpn, 0xffffff);
 -              MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
 -              MLX5_SET(mkc, mkc, log_page_size, 12);
 +              MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
 +              MLX5_SET(mkc, mkc, log_page_size, ent->page);
  
                spin_lock_irq(&ent->lock);
                ent->pending++;
@@@ -450,42 -451,6 +450,42 @@@ static void cache_work_func(struct work
        __cache_work_func(ent);
  }
  
 +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
 +{
 +      struct mlx5_mr_cache *cache = &dev->cache;
 +      struct mlx5_cache_ent *ent;
 +      struct mlx5_ib_mr *mr;
 +      int err;
 +
 +      if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
 +              mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
 +              return NULL;
 +      }
 +
 +      ent = &cache->ent[entry];
 +      while (1) {
 +              spin_lock_irq(&ent->lock);
 +              if (list_empty(&ent->head)) {
 +                      spin_unlock_irq(&ent->lock);
 +
 +                      err = add_keys(dev, entry, 1);
 +                      if (err && err != -EAGAIN)
 +                              return ERR_PTR(err);
 +
 +                      wait_for_completion(&ent->compl);
 +              } else {
 +                      mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
 +                                            list);
 +                      list_del(&mr->list);
 +                      ent->cur--;
 +                      spin_unlock_irq(&ent->lock);
 +                      if (ent->cur < ent->limit)
 +                              queue_work(cache->wq, &ent->work);
 +                      return mr;
 +              }
 +      }
 +}
 +
  static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
  {
        struct mlx5_mr_cache *cache = &dev->cache;
        int i;
  
        c = order2idx(dev, order);
 -      if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
 +      if (c < 0 || c > MAX_UMR_CACHE_ENTRY) {
                mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
                return NULL;
        }
  
 -      for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
 +      for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) {
                ent = &cache->ent[i];
  
                mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
        return mr;
  }
  
 -static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 +void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  {
        struct mlx5_mr_cache *cache = &dev->cache;
        struct mlx5_cache_ent *ent;
                mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
                return;
        }
 +
 +      if (unreg_umr(dev, mr))
 +              return;
 +
        ent = &cache->ent[c];
        spin_lock_irq(&ent->lock);
        list_add_tail(&mr->list, &ent->head);
@@@ -645,6 -606,7 +645,6 @@@ int mlx5_mr_cache_init(struct mlx5_ib_d
  {
        struct mlx5_mr_cache *cache = &dev->cache;
        struct mlx5_cache_ent *ent;
 -      int limit;
        int err;
        int i;
  
  
        setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 -              INIT_LIST_HEAD(&cache->ent[i].head);
 -              spin_lock_init(&cache->ent[i].lock);
 -
                ent = &cache->ent[i];
                INIT_LIST_HEAD(&ent->head);
                spin_lock_init(&ent->lock);
                ent->order = i + 2;
                ent->dev = dev;
 +              ent->limit = 0;
  
 -              if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
 -                  (mlx5_core_is_pf(dev->mdev)))
 -                      limit = dev->mdev->profile->mr_cache[i].limit;
 -              else
 -                      limit = 0;
 -
 +              init_completion(&ent->compl);
                INIT_WORK(&ent->work, cache_work_func);
                INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
 -              ent->limit = limit;
                queue_work(cache->wq, &ent->work);
 +
 +              if (i > MAX_UMR_CACHE_ENTRY) {
 +                      mlx5_odp_init_mr_cache_entry(ent);
 +                      continue;
 +              }
 +
 +              if (!use_umr(dev, ent->order))
 +                      continue;
 +
 +              ent->page = PAGE_SHIFT;
 +              ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
 +                         MLX5_IB_UMR_OCTOWORD;
 +              ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
 +              if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
 +                  mlx5_core_is_pf(dev->mdev))
 +                      ent->limit = dev->mdev->profile->mr_cache[i].limit;
 +              else
 +                      ent->limit = 0;
        }
  
        err = mlx5_mr_cache_debugfs_init(dev);
@@@ -780,7 -732,6 +780,7 @@@ struct ib_mr *mlx5_ib_get_dma_mr(struc
                goto err_in;
  
        kfree(in);
 +      mr->mmkey.type = MLX5_MKEY_MR;
        mr->ibmr.lkey = mr->mmkey.key;
        mr->ibmr.rkey = mr->mmkey.key;
        mr->umem = NULL;
@@@ -806,13 -757,94 +806,13 @@@ static int get_octo_len(u64 addr, u64 l
        return (npages + 1) / 2;
  }
  
 -static int use_umr(int order)
 +static int use_umr(struct mlx5_ib_dev *dev, int order)
  {
 +      if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
 +              return order <= MAX_UMR_CACHE_ENTRY + 2;
        return order <= MLX5_MAX_UMR_SHIFT;
  }
  
 -static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 -                        int npages, int page_shift, int *size,
 -                        __be64 **mr_pas, dma_addr_t *dma)
 -{
 -      __be64 *pas;
 -      struct device *ddev = dev->ib_dev.dev.parent;
 -
 -      /*
 -       * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
 -       * To avoid copying garbage after the pas array, we allocate
 -       * a little more.
 -       */
 -      *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
 -      *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
 -      if (!(*mr_pas))
 -              return -ENOMEM;
 -
 -      pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN);
 -      mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
 -      /* Clear padding after the actual pages. */
 -      memset(pas + npages, 0, *size - npages * sizeof(u64));
 -
 -      *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE);
 -      if (dma_mapping_error(ddev, *dma)) {
 -              kfree(*mr_pas);
 -              return -ENOMEM;
 -      }
 -
 -      return 0;
 -}
 -
 -static void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr,
 -                              struct ib_sge *sg, u64 dma, int n, u32 key,
 -                              int page_shift)
 -{
 -      struct mlx5_ib_dev *dev = to_mdev(pd->device);
 -      struct mlx5_umr_wr *umrwr = umr_wr(wr);
 -
 -      sg->addr = dma;
 -      sg->length = ALIGN(sizeof(u64) * n, 64);
 -      sg->lkey = dev->umrc.pd->local_dma_lkey;
 -
 -      wr->next = NULL;
 -      wr->sg_list = sg;
 -      if (n)
 -              wr->num_sge = 1;
 -      else
 -              wr->num_sge = 0;
 -
 -      wr->opcode = MLX5_IB_WR_UMR;
 -
 -      umrwr->npages = n;
 -      umrwr->page_shift = page_shift;
 -      umrwr->mkey = key;
 -}
 -
 -static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
 -                           struct ib_sge *sg, u64 dma, int n, u32 key,
 -                           int page_shift, u64 virt_addr, u64 len,
 -                           int access_flags)
 -{
 -      struct mlx5_umr_wr *umrwr = umr_wr(wr);
 -
 -      prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift);
 -
 -      wr->send_flags = 0;
 -
 -      umrwr->target.virt_addr = virt_addr;
 -      umrwr->length = len;
 -      umrwr->access_flags = access_flags;
 -      umrwr->pd = pd;
 -}
 -
 -static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
 -                             struct ib_send_wr *wr, u32 key)
 -{
 -      struct mlx5_umr_wr *umrwr = umr_wr(wr);
 -
 -      wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
 -      wr->opcode = MLX5_IB_WR_UMR;
 -      umrwr->mkey = key;
 -}
 -
  static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
                       int access_flags, struct ib_umem **umem,
                       int *npages, int *page_shift, int *ncont,
@@@ -859,39 -891,21 +859,39 @@@ static inline void mlx5_ib_init_umr_con
        init_completion(&context->done);
  }
  
 +static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
 +                                struct mlx5_umr_wr *umrwr)
 +{
 +      struct umr_common *umrc = &dev->umrc;
 +      struct ib_send_wr *bad;
 +      int err;
 +      struct mlx5_ib_umr_context umr_context;
 +
 +      mlx5_ib_init_umr_context(&umr_context);
 +      umrwr->wr.wr_cqe = &umr_context.cqe;
 +
 +      down(&umrc->sem);
 +      err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
 +      if (err) {
 +              mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
 +      } else {
 +              wait_for_completion(&umr_context.done);
 +              if (umr_context.status != IB_WC_SUCCESS) {
 +                      mlx5_ib_warn(dev, "reg umr failed (%u)\n",
 +                                   umr_context.status);
 +                      err = -EFAULT;
 +              }
 +      }
 +      up(&umrc->sem);
 +      return err;
 +}
 +
  static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
                                  u64 virt_addr, u64 len, int npages,
                                  int page_shift, int order, int access_flags)
  {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 -      struct device *ddev = dev->ib_dev.dev.parent;
 -      struct umr_common *umrc = &dev->umrc;
 -      struct mlx5_ib_umr_context umr_context;
 -      struct mlx5_umr_wr umrwr = {};
 -      struct ib_send_wr *bad;
        struct mlx5_ib_mr *mr;
 -      struct ib_sge sg;
 -      int size;
 -      __be64 *mr_pas;
 -      dma_addr_t dma;
        int err = 0;
        int i;
  
        if (!mr)
                return ERR_PTR(-EAGAIN);
  
 -      err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas,
 -                           &dma);
 -      if (err)
 -              goto free_mr;
 -
 -      mlx5_ib_init_umr_context(&umr_context);
 +      mr->ibmr.pd = pd;
 +      mr->umem = umem;
 +      mr->access_flags = access_flags;
 +      mr->desc_size = sizeof(struct mlx5_mtt);
 +      mr->mmkey.iova = virt_addr;
 +      mr->mmkey.size = len;
 +      mr->mmkey.pd = to_mpd(pd)->pdn;
  
 -      umrwr.wr.wr_cqe = &umr_context.cqe;
 -      prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
 -                       page_shift, virt_addr, len, access_flags);
 +      err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
 +                               MLX5_IB_UPD_XLT_ENABLE);
  
 -      down(&umrc->sem);
 -      err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
        if (err) {
 -              mlx5_ib_warn(dev, "post send failed, err %d\n", err);
 -              goto unmap_dma;
 -      } else {
 -              wait_for_completion(&umr_context.done);
 -              if (umr_context.status != IB_WC_SUCCESS) {
 -                      mlx5_ib_warn(dev, "reg umr failed\n");
 -                      err = -EFAULT;
 -              }
 +              mlx5_mr_cache_free(dev, mr);
 +              return ERR_PTR(err);
        }
  
 -      mr->mmkey.iova = virt_addr;
 -      mr->mmkey.size = len;
 -      mr->mmkey.pd = to_mpd(pd)->pdn;
 -
        mr->live = 1;
  
 -unmap_dma:
 -      up(&umrc->sem);
 -      dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
 +      return mr;
 +}
 +
 +static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
 +                             void *xlt, int page_shift, size_t size,
 +                             int flags)
 +{
 +      struct mlx5_ib_dev *dev = mr->dev;
 +      struct ib_umem *umem = mr->umem;
 +      if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
 +              mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
 +              return npages;
 +      }
  
 -      kfree(mr_pas);
 +      npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
  
 -free_mr:
 -      if (err) {
 -              free_cached_mr(dev, mr);
 -              return ERR_PTR(err);
 +      if (!(flags & MLX5_IB_UPD_XLT_ZAP)) {
 +              __mlx5_ib_populate_pas(dev, umem, page_shift,
 +                                     idx, npages, xlt,
 +                                     MLX5_IB_MTT_PRESENT);
 +              /* Clear padding after the pages
 +               * brought from the umem.
 +               */
 +              memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0,
 +                     size - npages * sizeof(struct mlx5_mtt));
        }
  
 -      return mr;
 +      return npages;
  }
  
 -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 -int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
 -                     int zap)
 +#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
 +                          MLX5_UMR_MTT_ALIGNMENT)
 +#define MLX5_SPARE_UMR_CHUNK 0x10000
 +
 +int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
 +                     int page_shift, int flags)
  {
        struct mlx5_ib_dev *dev = mr->dev;
-       struct device *ddev = dev->ib_dev.dma_device;
+       struct device *ddev = dev->ib_dev.dev.parent;
 -      struct umr_common *umrc = &dev->umrc;
 -      struct mlx5_ib_umr_context umr_context;
 -      struct ib_umem *umem = mr->umem;
 +      struct mlx5_ib_ucontext *uctx = NULL;
        int size;
 -      __be64 *pas;
 +      void *xlt;
        dma_addr_t dma;
 -      struct ib_send_wr *bad;
        struct mlx5_umr_wr wr;
        struct ib_sge sg;
        int err = 0;
 -      const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
 -      const int page_index_mask = page_index_alignment - 1;
 +      int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
 +                             ? sizeof(struct mlx5_klm)
 +                             : sizeof(struct mlx5_mtt);
 +      const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
 +      const int page_mask = page_align - 1;
        size_t pages_mapped = 0;
        size_t pages_to_map = 0;
        size_t pages_iter = 0;
 -      int use_emergency_buf = 0;
 +      gfp_t gfp;
  
        /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
 -       * so we need to align the offset and length accordingly */
 -      if (start_page_index & page_index_mask) {
 -              npages += start_page_index & page_index_mask;
 -              start_page_index &= ~page_index_mask;
 +       * so we need to align the offset and length accordingly
 +       */
 +      if (idx & page_mask) {
 +              npages += idx & page_mask;
 +              idx &= ~page_mask;
        }
  
 -      pages_to_map = ALIGN(npages, page_index_alignment);
 +      gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
 +      gfp |= __GFP_ZERO | __GFP_NOWARN;
  
 -      if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
 -              return -EINVAL;
 +      pages_to_map = ALIGN(npages, page_align);
 +      size = desc_size * pages_to_map;
 +      size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
  
 -      size = sizeof(u64) * pages_to_map;
 -      size = min_t(int, PAGE_SIZE, size);
 -      /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
 -       * code, when we are called from an invalidation. The pas buffer must
 -       * be 2k-aligned for Connect-IB. */
 -      pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
 -      if (!pas) {
 -              mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
 -              pas = mlx5_ib_update_mtt_emergency_buffer;
 -              size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
 -              use_emergency_buf = 1;
 -              mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
 -              memset(pas, 0, size);
 +      xlt = (void *)__get_free_pages(gfp, get_order(size));
 +      if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
 +              mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
 +                          size, get_order(size), MLX5_SPARE_UMR_CHUNK);
 +
 +              size = MLX5_SPARE_UMR_CHUNK;
 +              xlt = (void *)__get_free_pages(gfp, get_order(size));
 +      }
 +
 +      if (!xlt) {
 +              uctx = to_mucontext(mr->ibmr.uobject->context);
 +              mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
 +              size = PAGE_SIZE;
 +              xlt = (void *)uctx->upd_xlt_page;
 +              mutex_lock(&uctx->upd_xlt_page_mutex);
 +              memset(xlt, 0, size);
        }
 -      pages_iter = size / sizeof(u64);
 -      dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
 +      pages_iter = size / desc_size;
 +      dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
        if (dma_mapping_error(ddev, dma)) {
 -              mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
 +              mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
                err = -ENOMEM;
 -              goto free_pas;
 +              goto free_xlt;
        }
  
 +      sg.addr = dma;
 +      sg.lkey = dev->umrc.pd->local_dma_lkey;
 +
 +      memset(&wr, 0, sizeof(wr));
 +      wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
 +      if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
 +              wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
 +      wr.wr.sg_list = &sg;
 +      wr.wr.num_sge = 1;
 +      wr.wr.opcode = MLX5_IB_WR_UMR;
 +
 +      wr.pd = mr->ibmr.pd;
 +      wr.mkey = mr->mmkey.key;
 +      wr.length = mr->mmkey.size;
 +      wr.virt_addr = mr->mmkey.iova;
 +      wr.access_flags = mr->access_flags;
 +      wr.page_shift = page_shift;
 +
        for (pages_mapped = 0;
             pages_mapped < pages_to_map && !err;
 -           pages_mapped += pages_iter, start_page_index += pages_iter) {
 +           pages_mapped += pages_iter, idx += pages_iter) {
                dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
 -
 -              npages = min_t(size_t,
 -                             pages_iter,
 -                             ib_umem_num_pages(umem) - start_page_index);
 -
 -              if (!zap) {
 -                      __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
 -                                             start_page_index, npages, pas,
 -                                             MLX5_IB_MTT_PRESENT);
 -                      /* Clear padding after the pages brought from the
 -                       * umem. */
 -                      memset(pas + npages, 0, size - npages * sizeof(u64));
 -              }
 +              npages = populate_xlt(mr, idx, pages_iter, xlt,
 +                                    page_shift, size, flags);
  
                dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
  
 -              mlx5_ib_init_umr_context(&umr_context);
 -
 -              memset(&wr, 0, sizeof(wr));
 -              wr.wr.wr_cqe = &umr_context.cqe;
 -
 -              sg.addr = dma;
 -              sg.length = ALIGN(npages * sizeof(u64),
 -                              MLX5_UMR_MTT_ALIGNMENT);
 -              sg.lkey = dev->umrc.pd->local_dma_lkey;
 +              sg.length = ALIGN(npages * desc_size,
 +                                MLX5_UMR_MTT_ALIGNMENT);
 +
 +              if (pages_mapped + pages_iter >= pages_to_map) {
 +                      if (flags & MLX5_IB_UPD_XLT_ENABLE)
 +                              wr.wr.send_flags |=
 +                                      MLX5_IB_SEND_UMR_ENABLE_MR |
 +                                      MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
 +                                      MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
 +                      if (flags & MLX5_IB_UPD_XLT_PD ||
 +                          flags & MLX5_IB_UPD_XLT_ACCESS)
 +                              wr.wr.send_flags |=
 +                                      MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
 +                      if (flags & MLX5_IB_UPD_XLT_ADDR)
 +                              wr.wr.send_flags |=
 +                                      MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
 +              }
  
 -              wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
 -                              MLX5_IB_SEND_UMR_UPDATE_MTT;
 -              wr.wr.sg_list = &sg;
 -              wr.wr.num_sge = 1;
 -              wr.wr.opcode = MLX5_IB_WR_UMR;
 -              wr.npages = sg.length / sizeof(u64);
 -              wr.page_shift = PAGE_SHIFT;
 -              wr.mkey = mr->mmkey.key;
 -              wr.target.offset = start_page_index;
 +              wr.offset = idx * desc_size;
 +              wr.xlt_size = sg.length;
  
 -              down(&umrc->sem);
 -              err = ib_post_send(umrc->qp, &wr.wr, &bad);
 -              if (err) {
 -                      mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
 -              } else {
 -                      wait_for_completion(&umr_context.done);
 -                      if (umr_context.status != IB_WC_SUCCESS) {
 -                              mlx5_ib_err(dev, "UMR completion failed, code %d\n",
 -                                          umr_context.status);
 -                              err = -EFAULT;
 -                      }
 -              }
 -              up(&umrc->sem);
 +              err = mlx5_ib_post_send_wait(dev, &wr);
        }
        dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
  
 -free_pas:
 -      if (!use_emergency_buf)
 -              free_page((unsigned long)pas);
 +free_xlt:
 +      if (uctx)
 +              mutex_unlock(&uctx->upd_xlt_page_mutex);
        else
 -              mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
 +              free_pages((unsigned long)xlt, get_order(size));
  
        return err;
  }
 -#endif
  
  /*
   * If ibmr is NULL it will be allocated by reg_create.
@@@ -1115,9 -1122,8 +1115,9 @@@ static struct mlx5_ib_mr *reg_create(st
                goto err_1;
        }
        pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
 -      mlx5_ib_populate_pas(dev, umem, page_shift, pas,
 -                           pg_cap ? MLX5_IB_MTT_PRESENT : 0);
 +      if (!(access_flags & IB_ACCESS_ON_DEMAND))
 +              mlx5_ib_populate_pas(dev, umem, page_shift, pas,
 +                                   pg_cap ? MLX5_IB_MTT_PRESENT : 0);
  
        /* The pg_access bit allows setting the access flags
         * in the page list submitted with the command. */
                mlx5_ib_warn(dev, "create mkey failed\n");
                goto err_2;
        }
 +      mr->mmkey.type = MLX5_MKEY_MR;
 +      mr->desc_size = sizeof(struct mlx5_mtt);
        mr->umem = umem;
        mr->dev = dev;
        mr->live = 1;
@@@ -1194,33 -1198,20 +1194,33 @@@ struct ib_mr *mlx5_ib_reg_user_mr(struc
  
        mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
                    start, virt_addr, length, access_flags);
 +
 +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 +      if (!start && length == U64_MAX) {
 +              if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
 +                  !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
 +                      return ERR_PTR(-EINVAL);
 +
 +              mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
 +              return &mr->ibmr;
 +      }
 +#endif
 +
        err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
                           &page_shift, &ncont, &order);
  
          if (err < 0)
                return ERR_PTR(err);
  
 -      if (use_umr(order)) {
 +      if (use_umr(dev, order)) {
                mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
                             order, access_flags);
                if (PTR_ERR(mr) == -EAGAIN) {
                        mlx5_ib_dbg(dev, "cache empty for order %d", order);
                        mr = NULL;
                }
 -      } else if (access_flags & IB_ACCESS_ON_DEMAND) {
 +      } else if (access_flags & IB_ACCESS_ON_DEMAND &&
 +                 !MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
                err = -EINVAL;
                pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
                goto error;
@@@ -1257,39 -1248,106 +1257,39 @@@ error
  static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  {
        struct mlx5_core_dev *mdev = dev->mdev;
 -      struct umr_common *umrc = &dev->umrc;
 -      struct mlx5_ib_umr_context umr_context;
        struct mlx5_umr_wr umrwr = {};
 -      struct ib_send_wr *bad;
 -      int err;
  
        if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
                return 0;
  
 -      mlx5_ib_init_umr_context(&umr_context);
 +      umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
 +                            MLX5_IB_SEND_UMR_FAIL_IF_FREE;
 +      umrwr.wr.opcode = MLX5_IB_WR_UMR;
 +      umrwr.mkey = mr->mmkey.key;
  
 -      umrwr.wr.wr_cqe = &umr_context.cqe;
 -      prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key);
 -
 -      down(&umrc->sem);
 -      err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
 -      if (err) {
 -              up(&umrc->sem);
 -              mlx5_ib_dbg(dev, "err %d\n", err);
 -              goto error;
 -      } else {
 -              wait_for_completion(&umr_context.done);
 -              up(&umrc->sem);
 -      }
 -      if (umr_context.status != IB_WC_SUCCESS) {
 -              mlx5_ib_warn(dev, "unreg umr failed\n");
 -              err = -EFAULT;
 -              goto error;
 -      }
 -      return 0;
 -
 -error:
 -      return err;
 +      return mlx5_ib_post_send_wait(dev, &umrwr);
  }
  
 -static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
 -                   u64 length, int npages, int page_shift, int order,
 +static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
                     int access_flags, int flags)
  {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 -      struct device *ddev = dev->ib_dev.dev.parent;
 -      struct mlx5_ib_umr_context umr_context;
 -      struct ib_send_wr *bad;
        struct mlx5_umr_wr umrwr = {};
 -      struct ib_sge sg;
 -      struct umr_common *umrc = &dev->umrc;
 -      dma_addr_t dma = 0;
 -      __be64 *mr_pas = NULL;
 -      int size;
        int err;
  
 -      mlx5_ib_init_umr_context(&umr_context);
 -
 -      umrwr.wr.wr_cqe = &umr_context.cqe;
        umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
  
 -      if (flags & IB_MR_REREG_TRANS) {
 -              err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size,
 -                                   &mr_pas, &dma);
 -              if (err)
 -                      return err;
 -
 -              umrwr.target.virt_addr = virt_addr;
 -              umrwr.length = length;
 -              umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
 -      }
 -
 -      prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
 -                          page_shift);
 +      umrwr.wr.opcode = MLX5_IB_WR_UMR;
 +      umrwr.mkey = mr->mmkey.key;
  
 -      if (flags & IB_MR_REREG_PD) {
 +      if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
                umrwr.pd = pd;
 -              umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD;
 -      }
 -
 -      if (flags & IB_MR_REREG_ACCESS) {
                umrwr.access_flags = access_flags;
 -              umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS;
 +              umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
        }
  
 -      /* post send request to UMR QP */
 -      down(&umrc->sem);
 -      err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
 +      err = mlx5_ib_post_send_wait(dev, &umrwr);
  
 -      if (err) {
 -              mlx5_ib_warn(dev, "post send failed, err %d\n", err);
 -      } else {
 -              wait_for_completion(&umr_context.done);
 -              if (umr_context.status != IB_WC_SUCCESS) {
 -                      mlx5_ib_warn(dev, "reg umr failed (%u)\n",
 -                                   umr_context.status);
 -                      err = -EFAULT;
 -              }
 -      }
 -
 -      up(&umrc->sem);
 -      if (flags & IB_MR_REREG_TRANS) {
 -              dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
 -              kfree(mr_pas);
 -      }
        return err;
  }
  
@@@ -1306,7 -1364,6 +1306,7 @@@ int mlx5_ib_rereg_user_mr(struct ib_mr 
        u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address;
        u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length;
        int page_shift = 0;
 +      int upd_flags = 0;
        int npages = 0;
        int ncont = 0;
        int order = 0;
        mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
                    start, virt_addr, length, access_flags);
  
 +      atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
 +
        if (flags != IB_MR_REREG_PD) {
                /*
                 * Replace umem. This needs to be done whether or not UMR is
                err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
                                  &npages, &page_shift, &ncont, &order);
                if (err < 0) {
 -                      mr->umem = NULL;
 +                      clean_mr(mr);
                        return err;
                }
        }
                /*
                 * Send a UMR WQE
                 */
 -              err = rereg_umr(pd, mr, addr, len, npages, page_shift,
 -                              order, access_flags, flags);
 +              mr->ibmr.pd = pd;
 +              mr->access_flags = access_flags;
 +              mr->mmkey.iova = addr;
 +              mr->mmkey.size = len;
 +              mr->mmkey.pd = to_mpd(pd)->pdn;
 +
 +              if (flags & IB_MR_REREG_TRANS) {
 +                      upd_flags = MLX5_IB_UPD_XLT_ADDR;
 +                      if (flags & IB_MR_REREG_PD)
 +                              upd_flags |= MLX5_IB_UPD_XLT_PD;
 +                      if (flags & IB_MR_REREG_ACCESS)
 +                              upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
 +                      err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
 +                                               upd_flags);
 +              } else {
 +                      err = rereg_umr(pd, mr, access_flags, flags);
 +              }
 +
                if (err) {
                        mlx5_ib_warn(dev, "Failed to rereg UMR\n");
 +                      ib_umem_release(mr->umem);
 +                      clean_mr(mr);
                        return err;
                }
        }
  
 -      if (flags & IB_MR_REREG_PD) {
 -              ib_mr->pd = pd;
 -              mr->mmkey.pd = to_mpd(pd)->pdn;
 -      }
 +      set_mr_fileds(dev, mr, npages, len, access_flags);
  
 -      if (flags & IB_MR_REREG_ACCESS)
 -              mr->access_flags = access_flags;
 -
 -      if (flags & IB_MR_REREG_TRANS) {
 -              atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
 -              set_mr_fileds(dev, mr, npages, len, access_flags);
 -              mr->mmkey.iova = addr;
 -              mr->mmkey.size = len;
 -      }
  #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
        update_odp_mr(mr);
  #endif
 -
        return 0;
  }
  
@@@ -1411,9 -1461,9 +1411,9 @@@ mlx5_alloc_priv_descs(struct ib_device 
  
        mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
  
-       mr->desc_map = dma_map_single(device->dma_device, mr->descs,
+       mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
                                      size, DMA_TO_DEVICE);
-       if (dma_mapping_error(device->dma_device, mr->desc_map)) {
+       if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
                ret = -ENOMEM;
                goto err;
        }
@@@ -1432,7 -1482,7 +1432,7 @@@ mlx5_free_priv_descs(struct mlx5_ib_mr 
                struct ib_device *device = mr->ibmr.device;
                int size = mr->max_descs * mr->desc_size;
  
-               dma_unmap_single(device->dma_device, mr->desc_map,
+               dma_unmap_single(device->dev.parent, mr->desc_map,
                                 size, DMA_TO_DEVICE);
                kfree(mr->descs_alloc);
                mr->descs = NULL;
@@@ -1468,7 -1518,12 +1468,7 @@@ static int clean_mr(struct mlx5_ib_mr *
                        return err;
                }
        } else {
 -              err = unreg_umr(dev, mr);
 -              if (err) {
 -                      mlx5_ib_warn(dev, "failed unregister\n");
 -                      return err;
 -              }
 -              free_cached_mr(dev, mr);
 +              mlx5_mr_cache_free(dev, mr);
        }
  
        if (!umred)
@@@ -1491,11 -1546,8 +1491,11 @@@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr
                /* Wait for all running page-fault handlers to finish. */
                synchronize_srcu(&dev->mr_srcu);
                /* Destroy all page mappings */
 -              mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
 -                                       ib_umem_end(umem));
 +              if (umem->odp_data->page_list)
 +                      mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
 +                                               ib_umem_end(umem));
 +              else
 +                      mlx5_ib_free_implicit_mr(mr);
                /*
                 * We kill the umem before the MR for ODP,
                 * so that there will not be any invalidations in
@@@ -1551,11 -1603,11 +1551,11 @@@ struct ib_mr *mlx5_ib_alloc_mr(struct i
                mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
                MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
                err = mlx5_alloc_priv_descs(pd->device, mr,
 -                                          ndescs, sizeof(u64));
 +                                          ndescs, sizeof(struct mlx5_mtt));
                if (err)
                        goto err_free_in;
  
 -              mr->desc_size = sizeof(u64);
 +              mr->desc_size = sizeof(struct mlx5_mtt);
                mr->max_descs = ndescs;
        } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
                mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
        if (err)
                goto err_destroy_psv;
  
 +      mr->mmkey.type = MLX5_MKEY_MR;
        mr->ibmr.lkey = mr->mmkey.key;
        mr->ibmr.rkey = mr->mmkey.key;
        mr->umem = NULL;
@@@ -1685,7 -1736,6 +1685,7 @@@ struct ib_mw *mlx5_ib_alloc_mw(struct i
        if (err)
                goto free;
  
 +      mw->mmkey.type = MLX5_MKEY_MW;
        mw->ibmw.rkey = mw->mmkey.key;
  
        resp.response_length = min(offsetof(typeof(resp), response_length) +
index ce163184e7422450044f271348b1fba67871d001,8ebfa959c4253029c825062721f5a71976eb46aa..22d0e6ee5af6aaed90c754ddb142ccee9d188867
@@@ -146,7 -146,7 +146,7 @@@ static int mthca_query_port(struct ib_d
        if (!in_mad || !out_mad)
                goto out;
  
 -      memset(props, 0, sizeof *props);
 +      /* props being zeroed by the caller, avoid zeroing it here */
  
        init_query_mad(in_mad);
        in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
@@@ -212,7 -212,7 +212,7 @@@ static int mthca_modify_port(struct ib_
        if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
                return -ERESTARTSYS;
  
 -      err = mthca_query_port(ibdev, port, &attr);
 +      err = ib_query_port(ibdev, port, &attr);
        if (err)
                goto out;
  
@@@ -1166,14 -1166,13 +1166,14 @@@ static int mthca_port_immutable(struct 
        struct ib_port_attr attr;
        int err;
  
 -      err = mthca_query_port(ibdev, port_num, &attr);
 +      immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
 +
 +      err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
  
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
 -      immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
  
        return 0;
@@@ -1224,7 -1223,7 +1224,7 @@@ int mthca_register_device(struct mthca_
        dev->ib_dev.node_type            = RDMA_NODE_IB_CA;
        dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
        dev->ib_dev.num_comp_vectors     = 1;
-       dev->ib_dev.dma_device           = &dev->pdev->dev;
+       dev->ib_dev.dev.parent           = &dev->pdev->dev;
        dev->ib_dev.query_device         = mthca_query_device;
        dev->ib_dev.query_port           = mthca_query_port;
        dev->ib_dev.modify_device        = mthca_modify_device;
index d3eae2f3e9f504957305e4bda59f837327bc69f7,9b0ac36747020ec65c14e96e3fbb6302fbac8f2d..ccf0a4cffe9c1b359deceed34b939181b161b110
@@@ -475,10 -475,20 +475,10 @@@ static int nes_query_port(struct ib_dev
        struct nes_vnic *nesvnic = to_nesvnic(ibdev);
        struct net_device *netdev = nesvnic->netdev;
  
 -      memset(props, 0, sizeof(*props));
 +      /* props being zeroed by the caller, avoid zeroing it here */
  
        props->max_mtu = IB_MTU_4096;
 -
 -      if (netdev->mtu  >= 4096)
 -              props->active_mtu = IB_MTU_4096;
 -      else if (netdev->mtu  >= 2048)
 -              props->active_mtu = IB_MTU_2048;
 -      else if (netdev->mtu  >= 1024)
 -              props->active_mtu = IB_MTU_1024;
 -      else if (netdev->mtu  >= 512)
 -              props->active_mtu = IB_MTU_512;
 -      else
 -              props->active_mtu = IB_MTU_256;
 +      props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
  
        props->lid = 1;
        props->lmc = 0;
@@@ -3660,14 -3670,13 +3660,14 @@@ static int nes_port_immutable(struct ib
        struct ib_port_attr attr;
        int err;
  
 +      immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
 +
        err = nes_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
  
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
 -      immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
  
        return 0;
  }
@@@ -3731,7 -3740,6 +3731,6 @@@ struct nes_ib_device *nes_init_ofa_devi
  
        nesibdev->ibdev.phys_port_cnt = 1;
        nesibdev->ibdev.num_comp_vectors = 1;
-       nesibdev->ibdev.dma_device = &nesdev->pcidev->dev;
        nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev;
        nesibdev->ibdev.query_device = nes_query_device;
        nesibdev->ibdev.query_port = nes_query_port;
index 3e43bdc81e7a5b49574c5b5460e0f7a38636e199,38fb8bf35ef3d928ce197b93402715f3c676ca06..57c9a2ad0260bfd26366cacdfcfedca48308f80a
@@@ -93,16 -93,15 +93,16 @@@ static int ocrdma_port_immutable(struc
        int err;
  
        dev = get_ocrdma_dev(ibdev);
 -      err = ocrdma_query_port(ibdev, port_num, &attr);
 +      immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
 +      if (ocrdma_is_udp_encap_supported(dev))
 +              immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
 +
 +      err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
  
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
 -      immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
 -      if (ocrdma_is_udp_encap_supported(dev))
 -              immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
  
        return 0;
@@@ -199,7 -198,7 +199,7 @@@ static int ocrdma_register_device(struc
        dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
        dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext;
        dev->ibdev.mmap = ocrdma_mmap;
-       dev->ibdev.dma_device = &dev->nic_info.pdev->dev;
+       dev->ibdev.dev.parent = &dev->nic_info.pdev->dev;
  
        dev->ibdev.process_mad = ocrdma_process_mad;
        dev->ibdev.get_port_immutable = ocrdma_port_immutable;
index 3ac8aa5ef37de2c5242125077eef78035d565901,908c0b00ffdbfc6f1e3b0aa7ae59da3fa8852b78..b9b47e5cc8b3bde5a053107c0ba6a4997754c1ee
@@@ -170,7 -170,7 +170,7 @@@ static int qedr_register_device(struct 
        dev->ibdev.get_port_immutable = qedr_port_immutable;
        dev->ibdev.get_netdev = qedr_get_netdev;
  
-       dev->ibdev.dma_device = &dev->pdev->dev;
+       dev->ibdev.dev.parent = &dev->pdev->dev;
  
        dev->ibdev.get_link_layer = qedr_link_layer;
        dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str;
@@@ -576,7 -576,8 +576,7 @@@ static int qedr_set_device_attr(struct 
        return 0;
  }
  
 -void qedr_unaffiliated_event(void *context,
 -                           u8 event_code)
 +void qedr_unaffiliated_event(void *context, u8 event_code)
  {
        pr_err("unaffiliated event not implemented yet\n");
  }
@@@ -791,9 -792,6 +791,9 @@@ static struct qedr_dev *qedr_add(struc
                if (device_create_file(&dev->ibdev.dev, qedr_attributes[i]))
                        goto sysfs_err;
  
 +      if (!test_and_set_bit(QEDR_ENET_STATE_BIT, &dev->enet_state))
 +              qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ACTIVE);
 +
        DP_DEBUG(dev, QEDR_MSG_INIT, "qedr driver loaded successfully\n");
        return dev;
  
@@@ -826,10 -824,11 +826,10 @@@ static void qedr_remove(struct qedr_de
        ib_dealloc_device(&dev->ibdev);
  }
  
 -static int qedr_close(struct qedr_dev *dev)
 +static void qedr_close(struct qedr_dev *dev)
  {
 -      qedr_ib_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
 -
 -      return 0;
 +      if (test_and_clear_bit(QEDR_ENET_STATE_BIT, &dev->enet_state))
 +              qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ERR);
  }
  
  static void qedr_shutdown(struct qedr_dev *dev)
        qedr_remove(dev);
  }
  
 +static void qedr_open(struct qedr_dev *dev)
 +{
 +      if (!test_and_set_bit(QEDR_ENET_STATE_BIT, &dev->enet_state))
 +              qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ACTIVE);
 +}
 +
  static void qedr_mac_address_change(struct qedr_dev *dev)
  {
        union ib_gid *sgid = &dev->sgid_tbl[0];
  
        ether_addr_copy(dev->gsi_ll2_mac_address, dev->ndev->dev_addr);
  
 -      qedr_ib_dispatch_event(dev, 1, IB_EVENT_GID_CHANGE);
 +      qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_GID_CHANGE);
  
        if (rc)
                DP_ERR(dev, "Error updating mac filter\n");
@@@ -884,7 -877,7 +884,7 @@@ static void qedr_notify(struct qedr_de
  {
        switch (event) {
        case QEDE_UP:
 -              qedr_ib_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
 +              qedr_open(dev);
                break;
        case QEDE_DOWN:
                qedr_close(dev);
index 6b56f1c01a0789f9335691f57535947df4afa2a9,24ed82f3592fcdbfbcd12515543a83f4154fbea4..83f8b5f243819b04c40213ed7f6e5ae7b15648b8
@@@ -129,16 -129,78 +129,16 @@@ void qib_copy_sge(struct rvt_sge_state 
        struct rvt_sge *sge = &ss->sge;
  
        while (length) {
 -              u32 len = sge->length;
 +              u32 len = rvt_get_sge_length(sge, length);
  
 -              if (len > length)
 -                      len = length;
 -              if (len > sge->sge_length)
 -                      len = sge->sge_length;
 -              BUG_ON(len == 0);
 +              WARN_ON_ONCE(len == 0);
                memcpy(sge->vaddr, data, len);
 -              sge->vaddr += len;
 -              sge->length -= len;
 -              sge->sge_length -= len;
 -              if (sge->sge_length == 0) {
 -                      if (release)
 -                              rvt_put_mr(sge->mr);
 -                      if (--ss->num_sge)
 -                              *sge = *ss->sg_list++;
 -              } else if (sge->length == 0 && sge->mr->lkey) {
 -                      if (++sge->n >= RVT_SEGSZ) {
 -                              if (++sge->m >= sge->mr->mapsz)
 -                                      break;
 -                              sge->n = 0;
 -                      }
 -                      sge->vaddr =
 -                              sge->mr->map[sge->m]->segs[sge->n].vaddr;
 -                      sge->length =
 -                              sge->mr->map[sge->m]->segs[sge->n].length;
 -              }
 +              rvt_update_sge(ss, len, release);
                data += len;
                length -= len;
        }
  }
  
 -/**
 - * qib_skip_sge - skip over SGE memory - XXX almost dup of prev func
 - * @ss: the SGE state
 - * @length: the number of bytes to skip
 - */
 -void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
 -{
 -      struct rvt_sge *sge = &ss->sge;
 -
 -      while (length) {
 -              u32 len = sge->length;
 -
 -              if (len > length)
 -                      len = length;
 -              if (len > sge->sge_length)
 -                      len = sge->sge_length;
 -              BUG_ON(len == 0);
 -              sge->vaddr += len;
 -              sge->length -= len;
 -              sge->sge_length -= len;
 -              if (sge->sge_length == 0) {
 -                      if (release)
 -                              rvt_put_mr(sge->mr);
 -                      if (--ss->num_sge)
 -                              *sge = *ss->sg_list++;
 -              } else if (sge->length == 0 && sge->mr->lkey) {
 -                      if (++sge->n >= RVT_SEGSZ) {
 -                              if (++sge->m >= sge->mr->mapsz)
 -                                      break;
 -                              sge->n = 0;
 -                      }
 -                      sge->vaddr =
 -                              sge->mr->map[sge->m]->segs[sge->n].vaddr;
 -                      sge->length =
 -                              sge->mr->map[sge->m]->segs[sge->n].length;
 -              }
 -              length -= len;
 -      }
 -}
 -
  /*
   * Count the number of DMA descriptors needed to send length bytes of data.
   * Don't modify the qib_sge_state to get the count.
@@@ -406,6 -468,27 +406,6 @@@ static void mem_timer(unsigned long dat
        }
  }
  
 -static void update_sge(struct rvt_sge_state *ss, u32 length)
 -{
 -      struct rvt_sge *sge = &ss->sge;
 -
 -      sge->vaddr += length;
 -      sge->length -= length;
 -      sge->sge_length -= length;
 -      if (sge->sge_length == 0) {
 -              if (--ss->num_sge)
 -                      *sge = *ss->sg_list++;
 -      } else if (sge->length == 0 && sge->mr->lkey) {
 -              if (++sge->n >= RVT_SEGSZ) {
 -                      if (++sge->m >= sge->mr->mapsz)
 -                              return;
 -                      sge->n = 0;
 -              }
 -              sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
 -              sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
 -      }
 -}
 -
  #ifdef __LITTLE_ENDIAN
  static inline u32 get_upper_bits(u32 data, u32 shift)
  {
@@@ -563,11 -646,11 +563,11 @@@ static void copy_io(u32 __iomem *piobuf
                                data = clear_upper_bytes(v, extra, 0);
                        }
                }
 -              update_sge(ss, len);
 +              rvt_update_sge(ss, len, false);
                length -= len;
        }
        /* Update address before sending packet. */
 -      update_sge(ss, length);
 +      rvt_update_sge(ss, length, false);
        if (flush_wc) {
                /* must flush early everything before trigger word */
                qib_flush_wc();
@@@ -986,7 -1069,7 +986,7 @@@ static int qib_verbs_send_pio(struct rv
                u32 *addr = (u32 *) ss->sge.vaddr;
  
                /* Update address before sending packet. */
 -              update_sge(ss, len);
 +              rvt_update_sge(ss, len, false);
                if (flush_wc) {
                        qib_pio_copy(piobuf, addr, dwords - 1);
                        /* must flush early everything before trigger word */
@@@ -1220,7 -1303,6 +1220,7 @@@ static int qib_query_port(struct rvt_de
        enum ib_mtu mtu;
        u16 lid = ppd->lid;
  
 +      /* props being zeroed by the caller, avoid zeroing it here */
        props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
        props->lmc = ppd->lmc;
        props->state = dd->f_iblink_state(ppd->lastibcstat);
@@@ -1550,7 -1632,7 +1550,7 @@@ int qib_register_ib_device(struct qib_d
        ibdev->owner = THIS_MODULE;
        ibdev->node_guid = ppd->guid;
        ibdev->phys_port_cnt = dd->num_pports;
-       ibdev->dma_device = &dd->pcidev->dev;
+       ibdev->dev.parent = &dd->pcidev->dev;
        ibdev->modify_device = qib_modify_device;
        ibdev->process_mad = qib_process_mad;
  
        dd->verbs_dev.rdi.driver_f.stop_send_queue = qib_stop_send_queue;
        dd->verbs_dev.rdi.driver_f.flush_qp_waiters = qib_flush_qp_waiters;
        dd->verbs_dev.rdi.driver_f.notify_error_qp = qib_notify_error_qp;
 +      dd->verbs_dev.rdi.driver_f.notify_restart_rc = qib_restart_rc;
        dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu;
        dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp;
        dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr;
index 4f5a45db08e1889e7f4b072b7a92dfa5a5bacb6a,d44715cda1084ece5304befadd5f27f5f6d8d478..c0c1e8b027b1a6de79e07910ba177c775438fa3c
@@@ -321,9 -321,7 +321,9 @@@ static int usnic_port_immutable(struct 
        struct ib_port_attr attr;
        int err;
  
 -      err = usnic_ib_query_port(ibdev, port_num, &attr);
 +      immutable->core_cap_flags = RDMA_CORE_PORT_USNIC;
 +
 +      err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
  
@@@ -382,7 -380,7 +382,7 @@@ static void *usnic_ib_device_add(struc
        us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC_UDP;
        us_ibdev->ib_dev.phys_port_cnt = USNIC_IB_PORT_CNT;
        us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS;
-       us_ibdev->ib_dev.dma_device = &dev->dev;
+       us_ibdev->ib_dev.dev.parent = &dev->dev;
        us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION;
        strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX);
  
index e03d2f6c1f90ed4f7f9782027d73ba9c44ab4f3e,214793d514093476c07f2b77b2d6656e2b5045fc..100bea5c42ffb74375552131ebb1fbd5cbdc3659
@@@ -132,14 -132,13 +132,14 @@@ static int pvrdma_port_immutable(struc
        struct ib_port_attr attr;
        int err;
  
 -      err = pvrdma_query_port(ibdev, port_num, &attr);
 +      immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
 +
 +      err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
  
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
 -      immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
        return 0;
  }
@@@ -173,7 -172,7 +173,7 @@@ static int pvrdma_register_device(struc
        dev->flags = 0;
        dev->ib_dev.owner = THIS_MODULE;
        dev->ib_dev.num_comp_vectors = 1;
-       dev->ib_dev.dma_device = &dev->pdev->dev;
+       dev->ib_dev.dev.parent = &dev->pdev->dev;
        dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION;
        dev->ib_dev.uverbs_cmd_mask =
                (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
@@@ -283,7 -282,7 +283,7 @@@ static irqreturn_t pvrdma_intr0_handler
  
        dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n");
  
 -      if (dev->intr.type != PVRDMA_INTR_TYPE_MSIX) {
 +      if (!dev->pdev->msix_enabled) {
                /* Legacy intr */
                icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR);
                if (icr == 0)
@@@ -490,13 -489,31 +490,13 @@@ static irqreturn_t pvrdma_intrx_handler
        return IRQ_HANDLED;
  }
  
 -static void pvrdma_disable_msi_all(struct pvrdma_dev *dev)
 -{
 -      if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX)
 -              pci_disable_msix(dev->pdev);
 -      else if (dev->intr.type == PVRDMA_INTR_TYPE_MSI)
 -              pci_disable_msi(dev->pdev);
 -}
 -
  static void pvrdma_free_irq(struct pvrdma_dev *dev)
  {
        int i;
  
        dev_dbg(&dev->pdev->dev, "freeing interrupts\n");
 -
 -      if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX) {
 -              for (i = 0; i < dev->intr.size; i++) {
 -                      if (dev->intr.enabled[i]) {
 -                              free_irq(dev->intr.msix_entry[i].vector, dev);
 -                              dev->intr.enabled[i] = 0;
 -                      }
 -              }
 -      } else if (dev->intr.type == PVRDMA_INTR_TYPE_INTX ||
 -                 dev->intr.type == PVRDMA_INTR_TYPE_MSI) {
 -              free_irq(dev->pdev->irq, dev);
 -      }
 +      for (i = 0; i < dev->nr_vectors; i++)
 +              free_irq(pci_irq_vector(dev->pdev, i), dev);
  }
  
  static void pvrdma_enable_intrs(struct pvrdma_dev *dev)
@@@ -511,48 -528,126 +511,48 @@@ static void pvrdma_disable_intrs(struc
        pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0);
  }
  
 -static int pvrdma_enable_msix(struct pci_dev *pdev, struct pvrdma_dev *dev)
 -{
 -      int i;
 -      int ret;
 -
 -      for (i = 0; i < PVRDMA_MAX_INTERRUPTS; i++) {
 -              dev->intr.msix_entry[i].entry = i;
 -              dev->intr.msix_entry[i].vector = i;
 -
 -              switch (i) {
 -              case 0:
 -                      /* CMD ring handler */
 -                      dev->intr.handler[i] = pvrdma_intr0_handler;
 -                      break;
 -              case 1:
 -                      /* Async event ring handler */
 -                      dev->intr.handler[i] = pvrdma_intr1_handler;
 -                      break;
 -              default:
 -                      /* Completion queue handler */
 -                      dev->intr.handler[i] = pvrdma_intrx_handler;
 -                      break;
 -              }
 -      }
 -
 -      ret = pci_enable_msix(pdev, dev->intr.msix_entry,
 -                            PVRDMA_MAX_INTERRUPTS);
 -      if (!ret) {
 -              dev->intr.type = PVRDMA_INTR_TYPE_MSIX;
 -              dev->intr.size = PVRDMA_MAX_INTERRUPTS;
 -      } else if (ret > 0) {
 -              ret = pci_enable_msix(pdev, dev->intr.msix_entry, ret);
 -              if (!ret) {
 -                      dev->intr.type = PVRDMA_INTR_TYPE_MSIX;
 -                      dev->intr.size = ret;
 -              } else {
 -                      dev->intr.size = 0;
 -              }
 -      }
 -
 -      dev_dbg(&pdev->dev, "using interrupt type %d, size %d\n",
 -              dev->intr.type, dev->intr.size);
 -
 -      return ret;
 -}
 -
  static int pvrdma_alloc_intrs(struct pvrdma_dev *dev)
  {
 -      int ret = 0;
 -      int i;
 +      struct pci_dev *pdev = dev->pdev;
 +      int ret = 0, i;
  
 -      if (pci_find_capability(dev->pdev, PCI_CAP_ID_MSIX) &&
 -          pvrdma_enable_msix(dev->pdev, dev)) {
 -              /* Try MSI */
 -              ret = pci_enable_msi(dev->pdev);
 -              if (!ret) {
 -                      dev->intr.type = PVRDMA_INTR_TYPE_MSI;
 -              } else {
 -                      /* Legacy INTR */
 -                      dev->intr.type = PVRDMA_INTR_TYPE_INTX;
 -              }
 +      ret = pci_alloc_irq_vectors(pdev, 1, PVRDMA_MAX_INTERRUPTS,
 +                      PCI_IRQ_MSIX);
 +      if (ret < 0) {
 +              ret = pci_alloc_irq_vectors(pdev, 1, 1,
 +                              PCI_IRQ_MSI | PCI_IRQ_LEGACY);
 +              if (ret < 0)
 +                      return ret;
        }
 +      dev->nr_vectors = ret;
  
 -      /* Request First IRQ */
 -      switch (dev->intr.type) {
 -      case PVRDMA_INTR_TYPE_INTX:
 -      case PVRDMA_INTR_TYPE_MSI:
 -              ret = request_irq(dev->pdev->irq, pvrdma_intr0_handler,
 -                                IRQF_SHARED, DRV_NAME, dev);
 -              if (ret) {
 -                      dev_err(&dev->pdev->dev,
 -                              "failed to request interrupt\n");
 -                      goto disable_msi;
 -              }
 -              break;
 -      case PVRDMA_INTR_TYPE_MSIX:
 -              ret = request_irq(dev->intr.msix_entry[0].vector,
 -                                pvrdma_intr0_handler, 0, DRV_NAME, dev);
 -              if (ret) {
 -                      dev_err(&dev->pdev->dev,
 -                              "failed to request interrupt 0\n");
 -                      goto disable_msi;
 -              }
 -              dev->intr.enabled[0] = 1;
 -              break;
 -      default:
 -              /* Not reached */
 -              break;
 +      ret = request_irq(pci_irq_vector(dev->pdev, 0), pvrdma_intr0_handler,
 +                      pdev->msix_enabled ? 0 : IRQF_SHARED, DRV_NAME, dev);
 +      if (ret) {
 +              dev_err(&dev->pdev->dev,
 +                      "failed to request interrupt 0\n");
 +              goto out_free_vectors;
        }
  
 -      /* For MSIX: request intr for each vector */
 -      if (dev->intr.size > 1) {
 -              ret = request_irq(dev->intr.msix_entry[1].vector,
 -                                pvrdma_intr1_handler, 0, DRV_NAME, dev);
 +      for (i = 1; i < dev->nr_vectors; i++) {
 +              ret = request_irq(pci_irq_vector(dev->pdev, i),
 +                              i == 1 ? pvrdma_intr1_handler :
 +                                       pvrdma_intrx_handler,
 +                              0, DRV_NAME, dev);
                if (ret) {
                        dev_err(&dev->pdev->dev,
 -                              "failed to request interrupt 1\n");
 -                      goto free_irq;
 -              }
 -              dev->intr.enabled[1] = 1;
 -
 -              for (i = 2; i < dev->intr.size; i++) {
 -                      ret = request_irq(dev->intr.msix_entry[i].vector,
 -                                        pvrdma_intrx_handler, 0,
 -                                        DRV_NAME, dev);
 -                      if (ret) {
 -                              dev_err(&dev->pdev->dev,
 -                                      "failed to request interrupt %d\n", i);
 -                              goto free_irq;
 -                      }
 -                      dev->intr.enabled[i] = 1;
 +                              "failed to request interrupt %d\n", i);
 +                      goto free_irqs;
                }
        }
  
        return 0;
  
 -free_irq:
 -      pvrdma_free_irq(dev);
 -disable_msi:
 -      pvrdma_disable_msi_all(dev);
 +free_irqs:
 +      while (--i >= 0)
 +              free_irq(pci_irq_vector(dev->pdev, i), dev);
 +out_free_vectors:
 +      pci_free_irq_vectors(pdev);
        return ret;
  }
  
@@@ -934,7 -1029,7 +934,7 @@@ static int pvrdma_pci_probe(struct pci_
        if (ret) {
                dev_err(&pdev->dev, "failed to allocate interrupts\n");
                ret = -ENOMEM;
 -              goto err_netdevice;
 +              goto err_free_cq_ring;
        }
  
        /* Allocate UAR table. */
@@@ -996,7 -1091,9 +996,7 @@@ err_free_uar_table
        pvrdma_uar_table_cleanup(dev);
  err_free_intrs:
        pvrdma_free_irq(dev);
 -      pvrdma_disable_msi_all(dev);
 -err_netdevice:
 -      unregister_netdevice_notifier(&dev->nb_netdev);
 +      pci_free_irq_vectors(pdev);
  err_free_cq_ring:
        pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
  err_free_async_ring:
@@@ -1046,7 -1143,7 +1046,7 @@@ static void pvrdma_pci_remove(struct pc
  
        pvrdma_disable_intrs(dev);
        pvrdma_free_irq(dev);
 -      pvrdma_disable_msi_all(dev);
 +      pci_free_irq_vectors(pdev);
  
        /* Deactivate pvrdma device */
        pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET);
index c33a4f84413cf8ce18cbbc188621778c882ba852,2a821d2fb5690909991101f5d67f283b16b756be..78b276a90401bf04b9a29523dc9dd1ff56a7da08
@@@ -7,7 -7,7 +7,7 @@@
  #
  obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o
  
- rdmavt-y := vt.o ah.o cq.o dma.o mad.o mcast.o mmap.o mr.o pd.o qp.o \
 -rdmavt-y := vt.o ah.o cq.o mad.o mcast.o mmap.o mr.o pd.o qp.o srq.o \
 -      trace.o
++rdmavt-y := vt.o ah.o cq.o mad.o mcast.o mmap.o mr.o pd.o qp.o \
 +      rc.o srq.o trace.o
  
  CFLAGS_trace.o = -I$(src)
index c80a69b1ffcb060ebf111149e182b8a6daec5ce7,14d0ac6efd08981a7e92a507e1733bac515be122..ae30b6838d7958fa7a044535d24eb850e3607030
@@@ -120,19 -120,10 +120,19 @@@ static void rvt_deinit_mregion(struct r
        mr->mapsz = 0;
        while (i)
                kfree(mr->map[--i]);
 +      percpu_ref_exit(&mr->refcount);
 +}
 +
 +static void __rvt_mregion_complete(struct percpu_ref *ref)
 +{
 +      struct rvt_mregion *mr = container_of(ref, struct rvt_mregion,
 +                                            refcount);
 +
 +      complete(&mr->comp);
  }
  
  static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
 -                          int count)
 +                          int count, unsigned int percpu_flags)
  {
        int m, i = 0;
        struct rvt_dev_info *dev = ib_to_rvt(pd->device);
        for (; i < m; i++) {
                mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL,
                                          dev->dparms.node);
 -              if (!mr->map[i]) {
 -                      rvt_deinit_mregion(mr);
 -                      return -ENOMEM;
 -              }
 +              if (!mr->map[i])
 +                      goto bail;
                mr->mapsz++;
        }
        init_completion(&mr->comp);
        /* count returning the ptr to user */
 -      atomic_set(&mr->refcount, 1);
 +      if (percpu_ref_init(&mr->refcount, &__rvt_mregion_complete,
 +                          percpu_flags, GFP_KERNEL))
 +              goto bail;
 +
        atomic_set(&mr->lkey_invalid, 0);
        mr->pd = pd;
        mr->max_segs = count;
        return 0;
 +bail:
 +      rvt_deinit_mregion(mr);
 +      return -ENOMEM;
  }
  
  /**
@@@ -193,7 -180,8 +193,7 @@@ static int rvt_alloc_lkey(struct rvt_mr
                if (!tmr) {
                        rcu_assign_pointer(dev->dma_mr, mr);
                        mr->lkey_published = 1;
 -              } else {
 -                      rvt_put_mr(mr);
 +                      rvt_get_mr(mr);
                }
                goto success;
        }
@@@ -251,14 -239,11 +251,14 @@@ static void rvt_free_lkey(struct rvt_mr
        int freed = 0;
  
        spin_lock_irqsave(&rkt->lock, flags);
 -      if (!mr->lkey_published)
 -              goto out;
 -      if (lkey == 0) {
 -              RCU_INIT_POINTER(dev->dma_mr, NULL);
 +      if (!lkey) {
 +              if (mr->lkey_published) {
 +                      RCU_INIT_POINTER(dev->dma_mr, NULL);
 +                      rvt_put_mr(mr);
 +              }
        } else {
 +              if (!mr->lkey_published)
 +                      goto out;
                r = lkey >> (32 - dev->dparms.lkey_table_size);
                RCU_INIT_POINTER(rkt->table[r], NULL);
        }
@@@ -268,7 -253,7 +268,7 @@@ out
        spin_unlock_irqrestore(&rkt->lock, flags);
        if (freed) {
                synchronize_rcu();
 -              rvt_put_mr(mr);
 +              percpu_ref_kill(&mr->refcount);
        }
  }
  
@@@ -284,7 -269,7 +284,7 @@@ static struct rvt_mr *__rvt_alloc_mr(in
        if (!mr)
                goto bail;
  
 -      rval = rvt_init_mregion(&mr->mr, pd, count);
 +      rval = rvt_init_mregion(&mr->mr, pd, count, 0);
        if (rval)
                goto bail;
        /*
@@@ -309,8 -294,8 +309,8 @@@ bail
  
  static void __rvt_free_mr(struct rvt_mr *mr)
  {
 -      rvt_deinit_mregion(&mr->mr);
        rvt_free_lkey(&mr->mr);
 +      rvt_deinit_mregion(&mr->mr);
        kfree(mr);
  }
  
   * @acc: access flags
   *
   * Return: the memory region on success, otherwise returns an errno.
-  * Note that all DMA addresses should be created via the
-  * struct ib_dma_mapping_ops functions (see dma.c).
+  * Note that all DMA addresses should be created via the functions in
+  * struct dma_virt_ops.
   */
  struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
  {
                goto bail;
        }
  
 -      rval = rvt_init_mregion(&mr->mr, pd, 0);
 +      rval = rvt_init_mregion(&mr->mr, pd, 0, 0);
        if (rval) {
                ret = ERR_PTR(rval);
                goto bail;
@@@ -460,8 -445,8 +460,8 @@@ int rvt_dereg_mr(struct ib_mr *ibmr
        timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ);
        if (!timeout) {
                rvt_pr_err(rdi,
 -                         "rvt_dereg_mr timeout mr %p pd %p refcount %u\n",
 -                         mr, mr->mr.pd, atomic_read(&mr->mr.refcount));
 +                         "rvt_dereg_mr timeout mr %p pd %p\n",
 +                         mr, mr->mr.pd);
                rvt_get_mr(&mr->mr);
                ret = -EBUSY;
                goto out;
@@@ -638,8 -623,7 +638,8 @@@ struct ib_fmr *rvt_alloc_fmr(struct ib_
        if (!fmr)
                goto bail;
  
 -      rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages);
 +      rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages,
 +                              PERCPU_REF_INIT_ATOMIC);
        if (rval)
                goto bail;
  
@@@ -690,12 -674,11 +690,12 @@@ int rvt_map_phys_fmr(struct ib_fmr *ibf
        struct rvt_fmr *fmr = to_ifmr(ibfmr);
        struct rvt_lkey_table *rkt;
        unsigned long flags;
 -      int m, n, i;
 +      int m, n;
 +      unsigned long i;
        u32 ps;
        struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device);
  
 -      i = atomic_read(&fmr->mr.refcount);
 +      i = atomic_long_read(&fmr->mr.refcount.count);
        if (i > 2)
                return -EBUSY;
  
@@@ -799,7 -782,7 +799,7 @@@ int rvt_lkey_ok(struct rvt_lkey_table *
  
        /*
         * We use LKEY == zero for kernel virtual addresses
-        * (see rvt_get_dma_mr and dma.c).
+        * (see rvt_get_dma_mr() and dma_virt_ops).
         */
        rcu_read_lock();
        if (sge->lkey == 0) {
@@@ -897,7 -880,7 +897,7 @@@ int rvt_rkey_ok(struct rvt_qp *qp, stru
  
        /*
         * We use RKEY == zero for kernel virtual addresses
-        * (see rvt_get_dma_mr and dma.c).
+        * (see rvt_get_dma_mr() and dma_virt_ops).
         */
        rcu_read_lock();
        if (rkey == 0) {
index 1165639a914bf52518eee2b3f48b7a061bd9608c,19666e52b3b198e889a432785556e43d1661f3d4..0d7c6bb551d924ea76a05c512bc966c40ceb56dc
@@@ -47,6 -47,7 +47,7 @@@
  
  #include <linux/module.h>
  #include <linux/kernel.h>
+ #include <linux/dma-mapping.h>
  #include "vt.h"
  #include "trace.h"
  
@@@ -165,7 -166,7 +166,7 @@@ static int rvt_query_port(struct ib_dev
                return -EINVAL;
  
        rvp = rdi->ports[port_index];
 -      memset(props, 0, sizeof(*props));
 +      /* props being zeroed by the caller, avoid zeroing it here */
        props->sm_lid = rvp->sm_lid;
        props->sm_sl = rvp->sm_sl;
        props->port_cap_flags = rvp->port_cap_flags;
@@@ -326,14 -327,13 +327,14 @@@ static int rvt_get_port_immutable(struc
        if (port_index < 0)
                return -EINVAL;
  
 -      err = rvt_query_port(ibdev, port_num, &attr);
 +      immutable->core_cap_flags = rdi->dparms.core_cap_flags;
 +
 +      err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
  
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
 -      immutable->core_cap_flags = rdi->dparms.core_cap_flags;
        immutable->max_mad_size = rdi->dparms.max_mad_size;
  
        return 0;
@@@ -778,8 -778,7 +779,7 @@@ int rvt_register_device(struct rvt_dev_
        }
  
        /* DMA Operations */
-       rdi->ibdev.dma_ops =
-               rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops;
+       rdi->ibdev.dev.dma_ops = rdi->ibdev.dev.dma_ops ? : &dma_virt_ops;
  
        /* Protection Domain */
        spin_lock_init(&rdi->n_pds_lock);
index d2e2eff7a515dd31ac5bd12cde06a80c5f806fcb,321cca6cf4cd3cc63c2dd442672f46b0f5be8d0d..5113e502f6f969013cce0ac0f524bfba385f43f8
@@@ -31,6 -31,7 +31,7 @@@
   * SOFTWARE.
   */
  
+ #include <linux/dma-mapping.h>
  #include "rxe.h"
  #include "rxe_loc.h"
  #include "rxe_queue.h"
@@@ -86,7 -87,6 +87,7 @@@ static int rxe_query_port(struct ib_dev
  
        port = &rxe->port;
  
 +      /* *attr being zeroed by the caller, avoid zeroing it here */
        *attr = port->attr;
  
        mutex_lock(&rxe->usdev_lock);
@@@ -169,7 -169,7 +170,7 @@@ static int rxe_query_pkey(struct ib_dev
        struct rxe_port *port;
  
        if (unlikely(port_num != 1)) {
-               dev_warn(device->dma_device, "invalid port_num = %d\n",
+               dev_warn(device->dev.parent, "invalid port_num = %d\n",
                         port_num);
                goto err1;
        }
        port = &rxe->port;
  
        if (unlikely(index >= port->attr.pkey_tbl_len)) {
-               dev_warn(device->dma_device, "invalid index = %d\n",
+               dev_warn(device->dev.parent, "invalid index = %d\n",
                         index);
                goto err1;
        }
@@@ -262,14 -262,13 +263,14 @@@ static int rxe_port_immutable(struct ib
        int err;
        struct ib_port_attr attr;
  
 -      err = rxe_query_port(dev, port_num, &attr);
 +      immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
 +
 +      err = ib_query_port(dev, port_num, &attr);
        if (err)
                return err;
  
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
 -      immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
  
        return 0;
@@@ -1234,10 -1233,10 +1235,10 @@@ int rxe_register_device(struct rxe_dev 
        dev->node_type = RDMA_NODE_IB_CA;
        dev->phys_port_cnt = 1;
        dev->num_comp_vectors = RXE_NUM_COMP_VECTORS;
-       dev->dma_device = rxe_dma_device(rxe);
+       dev->dev.parent = rxe_dma_device(rxe);
        dev->local_dma_lkey = 0;
        dev->node_guid = rxe_node_guid(rxe);
-       dev->dma_ops = &rxe_dma_mapping_ops;
+       dev->dev.dma_ops = &dma_virt_ops;
  
        dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION;
        dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
index 259c59f673945bd4cbcf36a1debe9ce01bed6081,6fdfa1a7a0aeee1bdbaa7d2dc5e76c6994458fb7..d1d3fb7a6127c5d585009267d8379c401b2ea4ff
@@@ -126,7 -126,8 +126,7 @@@ int ipoib_open(struct net_device *dev
                goto err_disable;
        }
  
 -      if (ipoib_ib_dev_up(dev))
 -              goto err_stop;
 +      ipoib_ib_dev_up(dev);
  
        if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
                struct ipoib_dev_priv *cpriv;
  
        return 0;
  
 -err_stop:
 -      ipoib_ib_dev_stop(dev);
 -
  err_disable:
        clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
  
@@@ -721,14 -725,6 +721,14 @@@ int ipoib_check_sm_sendonly_fullmember_
        return ret;
  }
  
 +static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
 +{
 +      struct ipoib_pseudo_header *phdr;
 +
 +      phdr = (struct ipoib_pseudo_header *)skb_push(skb, sizeof(*phdr));
 +      memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
 +}
 +
  void ipoib_flush_paths(struct net_device *dev)
  {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
@@@ -955,7 -951,8 +955,7 @@@ static void neigh_add_path(struct sk_bu
                        }
                        if (skb_queue_len(&neigh->queue) <
                            IPOIB_MAX_PATH_REC_QUEUE) {
 -                              /* put pseudoheader back on for next time */
 -                              skb_push(skb, IPOIB_PSEUDO_LEN);
 +                              push_pseudo_header(skb, neigh->daddr);
                                __skb_queue_tail(&neigh->queue, skb);
                        } else {
                                ipoib_warn(priv, "queue length limit %d. Packet drop.\n",
  
                if (!path->query && path_rec_start(dev, path))
                        goto err_path;
 -              if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
 +              if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
 +                      push_pseudo_header(skb, neigh->daddr);
                        __skb_queue_tail(&neigh->queue, skb);
 -              else
 +              } else {
                        goto err_drop;
 +              }
        }
  
        spin_unlock_irqrestore(&priv->lock, flags);
@@@ -1014,7 -1009,8 +1014,7 @@@ static void unicast_arp_send(struct sk_
                }
                if (path) {
                        if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
 -                              /* put pseudoheader back on for next time */
 -                              skb_push(skb, IPOIB_PSEUDO_LEN);
 +                              push_pseudo_header(skb, phdr->hwaddr);
                                __skb_queue_tail(&path->queue, skb);
                        } else {
                                ++dev->stats.tx_dropped;
                return;
        } else if ((path->query || !path_rec_start(dev, path)) &&
                   skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
 -              /* put pseudoheader back on for next time */
 -              skb_push(skb, IPOIB_PSEUDO_LEN);
 +              push_pseudo_header(skb, phdr->hwaddr);
                __skb_queue_tail(&path->queue, skb);
        } else {
                ++dev->stats.tx_dropped;
@@@ -1127,7 -1124,8 +1127,7 @@@ send_using_neigh
        }
  
        if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
 -              /* put pseudoheader back on for next time */
 -              skb_push(skb, sizeof(*phdr));
 +              push_pseudo_header(skb, phdr->hwaddr);
                spin_lock_irqsave(&priv->lock, flags);
                __skb_queue_tail(&neigh->queue, skb);
                spin_unlock_irqrestore(&priv->lock, flags);
@@@ -1159,6 -1157,7 +1159,6 @@@ static int ipoib_hard_header(struct sk_
                             unsigned short type,
                             const void *daddr, const void *saddr, unsigned len)
  {
 -      struct ipoib_pseudo_header *phdr;
        struct ipoib_header *header;
  
        header = (struct ipoib_header *) skb_push(skb, sizeof *header);
         * destination address into skb hard header so we can figure out where
         * to send the packet later.
         */
 -      phdr = (struct ipoib_pseudo_header *) skb_push(skb, sizeof(*phdr));
 -      memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
 +      push_pseudo_header(skb, daddr);
  
        return IPOIB_HARD_LEN;
  }
@@@ -1995,7 -1995,7 +1995,7 @@@ int ipoib_add_pkey_attr(struct net_devi
        return device_create_file(&dev->dev, &dev_attr_pkey);
  }
  
 -int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
 +void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
  {
        priv->hca_caps = hca->attrs.device_cap_flags;
  
  
                priv->dev->features |= priv->dev->hw_features;
        }
 -
 -      return 0;
  }
  
  static struct net_device *ipoib_add_port(const char *format,
        if (!priv)
                goto alloc_mem_failed;
  
-       SET_NETDEV_DEV(priv->dev, hca->dma_device);
+       SET_NETDEV_DEV(priv->dev, hca->dev.parent);
        priv->dev->dev_id = port - 1;
  
        result = ib_query_port(hca, port, &attr);
                goto device_init_failed;
        }
  
 -      result = ipoib_set_dev_features(priv, hca);
 -      if (result)
 -              goto device_init_failed;
 +      ipoib_set_dev_features(priv, hca);
  
        /*
         * Set the full membership bit, so that we join the right
index 30a6985909e0d95c446eb51abe17cf570dd18f73,da3caef1b7cb3b46a4bdce378b4614d631da08d2..5a887efb4bdf1b6405d3fc4342918084ae420b57
@@@ -651,8 -651,15 +651,8 @@@ iscsi_iser_session_create(struct iscsi_
                                                   SHOST_DIX_GUARD_CRC);
                }
  
 -              /*
 -               * Limit the sg_tablesize and max_sectors based on the device
 -               * max fastreg page list length.
 -               */
 -              shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize,
 -                      ib_conn->device->ib_device->attrs.max_fast_reg_page_list_len);
 -
                if (iscsi_host_add(shost,
-                                  ib_conn->device->ib_device->dma_device)) {
+                                  ib_conn->device->ib_device->dev.parent)) {
                        mutex_unlock(&iser_conn->state_mutex);
                        goto free_host;
                }
        max_fr_sectors = ((shost->sg_tablesize - 1) * PAGE_SIZE) >> 9;
        shost->max_sectors = min(iser_max_sectors, max_fr_sectors);
  
 +      iser_dbg("iser_conn %p, sg_tablesize %u, max_sectors %u\n",
 +               iser_conn, shost->sg_tablesize,
 +               shost->max_sectors);
 +
        if (cmds_max > max_cmds) {
                iser_info("cmds_max changed from %u to %u\n",
                          cmds_max, max_cmds);
@@@ -994,7 -997,6 +994,7 @@@ static struct scsi_host_template iscsi_
        .change_queue_depth     = scsi_change_queue_depth,
        .sg_tablesize           = ISCSI_ISER_DEF_SG_TABLESIZE,
        .cmd_per_lun            = ISER_DEF_CMD_PER_LUN,
 +      .eh_timed_out           = iscsi_eh_cmd_timed_out,
        .eh_abort_handler       = iscsi_eh_abort,
        .eh_device_reset_handler= iscsi_eh_device_reset,
        .eh_target_reset_handler = iscsi_eh_recover_target,
index 3c7fa972a38cbc37437695820d3e337b123c8f05,64b33d69b8741cac8a29608f6e5ce19b368d5182..cee46266f434850e6a9533615d812b4c610d5c13
@@@ -40,7 -40,6 +40,7 @@@
  #include <linux/parser.h>
  #include <linux/random.h>
  #include <linux/jiffies.h>
 +#include <linux/lockdep.h>
  #include <rdma/ib_cache.h>
  
  #include <linux/atomic.h>
@@@ -466,13 -465,9 +466,13 @@@ static struct srp_fr_pool *srp_alloc_fr
   * completion handler can access the queue pair while it is
   * being destroyed.
   */
 -static void srp_destroy_qp(struct ib_qp *qp)
 +static void srp_destroy_qp(struct srp_rdma_ch *ch, struct ib_qp *qp)
  {
 -      ib_drain_rq(qp);
 +      spin_lock_irq(&ch->lock);
 +      ib_process_cq_direct(ch->send_cq, -1);
 +      spin_unlock_irq(&ch->lock);
 +
 +      ib_drain_qp(qp);
        ib_destroy_qp(qp);
  }
  
@@@ -546,7 -541,7 +546,7 @@@ static int srp_create_ch_ib(struct srp_
        }
  
        if (ch->qp)
 -              srp_destroy_qp(ch->qp);
 +              srp_destroy_qp(ch, ch->qp);
        if (ch->recv_cq)
                ib_free_cq(ch->recv_cq);
        if (ch->send_cq)
        return 0;
  
  err_qp:
 -      srp_destroy_qp(qp);
 +      srp_destroy_qp(ch, qp);
  
  err_send_cq:
        ib_free_cq(send_cq);
@@@ -613,7 -608,7 +613,7 @@@ static void srp_free_ch_ib(struct srp_t
                        ib_destroy_fmr_pool(ch->fmr_pool);
        }
  
 -      srp_destroy_qp(ch->qp);
 +      srp_destroy_qp(ch, ch->qp);
        ib_free_cq(ch->send_cq);
        ib_free_cq(ch->recv_cq);
  
@@@ -1804,8 -1799,6 +1804,8 @@@ static struct srp_iu *__srp_get_tx_iu(s
        s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
        struct srp_iu *iu;
  
 +      lockdep_assert_held(&ch->lock);
 +
        ib_process_cq_direct(ch->send_cq, -1);
  
        if (list_empty(&ch->free_tx))
        return iu;
  }
  
 +/*
 + * Note: if this function is called from inside ib_drain_sq() then it will
 + * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
 + * with status IB_WC_SUCCESS then that's a bug.
 + */
  static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
  {
        struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
                return;
        }
  
 +      lockdep_assert_held(&ch->lock);
 +
        list_add(&iu->list, &ch->free_tx);
  }
  
@@@ -1898,24 -1884,17 +1898,24 @@@ static void srp_process_rsp(struct srp_
        if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
                spin_lock_irqsave(&ch->lock, flags);
                ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
 +              if (rsp->tag == ch->tsk_mgmt_tag) {
 +                      ch->tsk_mgmt_status = -1;
 +                      if (be32_to_cpu(rsp->resp_data_len) >= 4)
 +                              ch->tsk_mgmt_status = rsp->data[3];
 +                      complete(&ch->tsk_mgmt_done);
 +              } else {
 +                      shost_printk(KERN_ERR, target->scsi_host,
 +                                   "Received tsk mgmt response too late for tag %#llx\n",
 +                                   rsp->tag);
 +              }
                spin_unlock_irqrestore(&ch->lock, flags);
 -
 -              ch->tsk_mgmt_status = -1;
 -              if (be32_to_cpu(rsp->resp_data_len) >= 4)
 -                      ch->tsk_mgmt_status = rsp->data[3];
 -              complete(&ch->tsk_mgmt_done);
        } else {
                scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
 -              if (scmnd) {
 +              if (scmnd && scmnd->host_scribble) {
                        req = (void *)scmnd->host_scribble;
                        scmnd = srp_claim_req(ch, req, NULL, scmnd);
 +              } else {
 +                      scmnd = NULL;
                }
                if (!scmnd) {
                        shost_printk(KERN_ERR, target->scsi_host,
@@@ -2547,18 -2526,19 +2547,18 @@@ srp_change_queue_depth(struct scsi_devi
  }
  
  static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
 -                           u8 func)
 +                           u8 func, u8 *status)
  {
        struct srp_target_port *target = ch->target;
        struct srp_rport *rport = target->rport;
        struct ib_device *dev = target->srp_host->srp_dev->dev;
        struct srp_iu *iu;
        struct srp_tsk_mgmt *tsk_mgmt;
 +      int res;
  
        if (!ch->connected || target->qp_in_error)
                return -1;
  
 -      init_completion(&ch->tsk_mgmt_done);
 -
        /*
         * Lock the rport mutex to avoid that srp_create_ch_ib() is
         * invoked while a task management function is being sent.
  
        tsk_mgmt->opcode        = SRP_TSK_MGMT;
        int_to_scsilun(lun, &tsk_mgmt->lun);
 -      tsk_mgmt->tag           = req_tag | SRP_TAG_TSK_MGMT;
        tsk_mgmt->tsk_mgmt_func = func;
        tsk_mgmt->task_tag      = req_tag;
  
 +      spin_lock_irq(&ch->lock);
 +      ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
 +      tsk_mgmt->tag = ch->tsk_mgmt_tag;
 +      spin_unlock_irq(&ch->lock);
 +
 +      init_completion(&ch->tsk_mgmt_done);
 +
        ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
                                      DMA_TO_DEVICE);
        if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
  
                return -1;
        }
 +      res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
 +                                      msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
 +      if (res > 0 && status)
 +              *status = ch->tsk_mgmt_status;
        mutex_unlock(&rport->mutex);
  
 -      if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
 -                                       msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
 -              return -1;
 +      WARN_ON_ONCE(res < 0);
  
 -      return 0;
 +      return res > 0 ? 0 : -1;
  }
  
  static int srp_abort(struct scsi_cmnd *scmnd)
        shost_printk(KERN_ERR, target->scsi_host,
                     "Sending SRP abort for tag %#x\n", tag);
        if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
 -                            SRP_TSK_ABORT_TASK) == 0)
 +                            SRP_TSK_ABORT_TASK, NULL) == 0)
                ret = SUCCESS;
        else if (target->rport->state == SRP_RPORT_LOST)
                ret = FAST_IO_FAIL;
@@@ -2651,15 -2623,14 +2651,15 @@@ static int srp_reset_device(struct scsi
        struct srp_target_port *target = host_to_target(scmnd->device->host);
        struct srp_rdma_ch *ch;
        int i;
 +      u8 status;
  
        shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
  
        ch = &target->ch[0];
        if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
 -                            SRP_TSK_LUN_RESET))
 +                            SRP_TSK_LUN_RESET, &status))
                return FAILED;
 -      if (ch->tsk_mgmt_status)
 +      if (status)
                return FAILED;
  
        for (i = 0; i < target->ch_count; i++) {
@@@ -2688,8 -2659,9 +2688,8 @@@ static int srp_slave_alloc(struct scsi_
        struct Scsi_Host *shost = sdev->host;
        struct srp_target_port *target = host_to_target(shost);
        struct srp_device *srp_dev = target->srp_host->srp_dev;
 -      struct ib_device *ibdev = srp_dev->dev;
  
 -      if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
 +      if (true)
                blk_queue_virt_boundary(sdev->request_queue,
                                        ~srp_dev->mr_page_mask);
  
@@@ -2892,7 -2864,6 +2892,7 @@@ static struct scsi_host_template srp_te
        .info                           = srp_target_info,
        .queuecommand                   = srp_queuecommand,
        .change_queue_depth             = srp_change_queue_depth,
 +      .eh_timed_out                   = srp_timed_out,
        .eh_abort_handler               = srp_abort,
        .eh_device_reset_handler        = srp_reset_device,
        .eh_host_reset_handler          = srp_reset_host,
@@@ -2933,7 -2904,7 +2933,7 @@@ static int srp_add_target(struct srp_ho
        sprintf(target->target_name, "SRP.T10:%016llX",
                be64_to_cpu(target->id_ext));
  
-       if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
+       if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
                return -ENODEV;
  
        memcpy(ids.port_id, &target->id_ext, 8);
@@@ -3445,12 -3416,11 +3445,12 @@@ static ssize_t srp_create_target(struc
                        ret = srp_connect_ch(ch, multich);
                        if (ret) {
                                shost_printk(KERN_ERR, target->scsi_host,
 -                                           PFX "Connection %d/%d failed\n",
 +                                           PFX "Connection %d/%d to %pI6 failed\n",
                                             ch_start + cpu_idx,
 -                                           target->ch_count);
 +                                           target->ch_count,
 +                                           ch->target->orig_dgid.raw);
                                if (node_idx == 0 && cpu_idx == 0) {
 -                                      goto err_disconnect;
 +                                      goto free_ch;
                                } else {
                                        srp_free_ch_ib(target, ch);
                                        srp_free_req_data(target, ch);
@@@ -3497,7 -3467,6 +3497,7 @@@ put
  err_disconnect:
        srp_disconnect_target(target);
  
 +free_ch:
        for (i = 0; i < target->ch_count; i++) {
                ch = &target->ch[i];
                srp_free_ch_ib(target, ch);
@@@ -3546,7 -3515,7 +3546,7 @@@ static struct srp_host *srp_add_port(st
        host->port = port;
  
        host->dev.class = &srp_class;
-       host->dev.parent = device->dev->dma_device;
+       host->dev.parent = device->dev->dev.parent;
        dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
  
        if (device_register(&host->dev))
@@@ -3725,12 -3694,6 +3725,12 @@@ static int __init srp_init_module(void
                indirect_sg_entries = cmd_sg_entries;
        }
  
 +      if (indirect_sg_entries > SG_MAX_SEGMENTS) {
 +              pr_warn("Clamping indirect_sg_entries to %u\n",
 +                      SG_MAX_SEGMENTS);
 +              indirect_sg_entries = SG_MAX_SEGMENTS;
 +      }
 +
        srp_remove_wq = create_workqueue("srp_remove");
        if (!srp_remove_wq) {
                ret = -ENOMEM;
index bc5a2d86ae7ea1de425dfcc204fa71ed576c17a8,147beadd6815be0d971d5cdce6fe33565f73d1e4..7e314c2f207162f51b8c1ab78c6f0367b9025321
@@@ -500,7 -500,6 +500,7 @@@ static int srpt_refresh_port(struct srp
        struct ib_mad_reg_req reg_req;
        struct ib_port_modify port_modify;
        struct ib_port_attr port_attr;
 +      __be16 *guid;
        int ret;
  
        memset(&port_modify, 0, sizeof(port_modify));
        if (ret)
                goto err_query_port;
  
 +      sport->port_guid_wwn.priv = sport;
 +      guid = (__be16 *)&sport->gid.global.interface_id;
        snprintf(sport->port_guid, sizeof(sport->port_guid),
 -              "0x%016llx%016llx",
 -              be64_to_cpu(sport->gid.global.subnet_prefix),
 -              be64_to_cpu(sport->gid.global.interface_id));
 +               "%04x:%04x:%04x:%04x",
 +               be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
 +               be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
 +      sport->port_gid_wwn.priv = sport;
 +      snprintf(sport->port_gid, sizeof(sport->port_gid),
 +               "0x%016llx%016llx",
 +               be64_to_cpu(sport->gid.global.subnet_prefix),
 +               be64_to_cpu(sport->gid.global.interface_id));
  
        if (!sport->mad_agent) {
                memset(&reg_req, 0, sizeof(reg_req));
@@@ -1846,7 -1838,6 +1846,7 @@@ static int srpt_cm_req_recv(struct ib_c
        struct srp_login_rej *rej;
        struct ib_cm_rep_param *rep_param;
        struct srpt_rdma_ch *ch, *tmp_ch;
 +      __be16 *guid;
        u32 it_iu_len;
        int i, ret = 0;
  
                goto destroy_ib;
        }
  
 -      /*
 -       * Use the initator port identifier as the session name, when
 -       * checking against se_node_acl->initiatorname[] this can be
 -       * with or without preceeding '0x'.
 -       */
 +      guid = (__be16 *)&param->primary_path->sgid.global.interface_id;
 +      snprintf(ch->ini_guid, sizeof(ch->ini_guid), "%04x:%04x:%04x:%04x",
 +               be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
 +               be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
        snprintf(ch->sess_name, sizeof(ch->sess_name), "0x%016llx%016llx",
                        be64_to_cpu(*(__be64 *)ch->i_port_id),
                        be64_to_cpu(*(__be64 *)(ch->i_port_id + 8)));
  
        pr_debug("registering session %s\n", ch->sess_name);
  
 -      ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0,
 +      if (sport->port_guid_tpg.se_tpg_wwn)
 +              ch->sess = target_alloc_session(&sport->port_guid_tpg, 0, 0,
 +                                              TARGET_PROT_NORMAL,
 +                                              ch->ini_guid, ch, NULL);
 +      if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
 +              ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0,
                                        TARGET_PROT_NORMAL, ch->sess_name, ch,
                                        NULL);
        /* Retry without leading "0x" */
 -      if (IS_ERR(ch->sess))
 -              ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0,
 +      if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
 +              ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0,
                                                TARGET_PROT_NORMAL,
                                                ch->sess_name + 2, ch, NULL);
 -      if (IS_ERR(ch->sess)) {
 +      if (IS_ERR_OR_NULL(ch->sess)) {
                pr_info("Rejected login because no ACL has been configured yet for initiator %s.\n",
                        ch->sess_name);
                rej->reason = cpu_to_be32((PTR_ERR(ch->sess) == -ENOMEM) ?
@@@ -2433,7 -2420,7 +2433,7 @@@ static int srpt_release_sdev(struct srp
        return 0;
  }
  
 -static struct srpt_port *__srpt_lookup_port(const char *name)
 +static struct se_wwn *__srpt_lookup_wwn(const char *name)
  {
        struct ib_device *dev;
        struct srpt_device *sdev;
                for (i = 0; i < dev->phys_port_cnt; i++) {
                        sport = &sdev->port[i];
  
 -                      if (!strcmp(sport->port_guid, name))
 -                              return sport;
 +                      if (strcmp(sport->port_guid, name) == 0)
 +                              return &sport->port_guid_wwn;
 +                      if (strcmp(sport->port_gid, name) == 0)
 +                              return &sport->port_gid_wwn;
                }
        }
  
        return NULL;
  }
  
 -static struct srpt_port *srpt_lookup_port(const char *name)
 +static struct se_wwn *srpt_lookup_wwn(const char *name)
  {
 -      struct srpt_port *sport;
 +      struct se_wwn *wwn;
  
        spin_lock(&srpt_dev_lock);
 -      sport = __srpt_lookup_port(name);
 +      wwn = __srpt_lookup_wwn(name);
        spin_unlock(&srpt_dev_lock);
  
 -      return sport;
 +      return wwn;
  }
  
  /**
@@@ -2479,8 -2464,7 +2479,7 @@@ static void srpt_add_one(struct ib_devi
        struct ib_srq_init_attr srq_attr;
        int i;
  
-       pr_debug("device = %p, device->dma_ops = %p\n", device,
-                device->dma_ops);
+       pr_debug("device = %p\n", device);
  
        sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
        if (!sdev)
@@@ -2658,19 -2642,11 +2657,19 @@@ static char *srpt_get_fabric_name(void
        return "srpt";
  }
  
 +static struct srpt_port *srpt_tpg_to_sport(struct se_portal_group *tpg)
 +{
 +      return tpg->se_tpg_wwn->priv;
 +}
 +
  static char *srpt_get_fabric_wwn(struct se_portal_group *tpg)
  {
 -      struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1);
 +      struct srpt_port *sport = srpt_tpg_to_sport(tpg);
  
 -      return sport->port_guid;
 +      WARN_ON_ONCE(tpg != &sport->port_guid_tpg &&
 +                   tpg != &sport->port_gid_tpg);
 +      return tpg == &sport->port_guid_tpg ? sport->port_guid :
 +              sport->port_gid;
  }
  
  static u16 srpt_get_tag(struct se_portal_group *tpg)
@@@ -2760,19 -2736,6 +2759,19 @@@ static int srpt_get_tcm_cmd_state(struc
        return srpt_get_cmd_state(ioctx);
  }
  
 +static int srpt_parse_guid(u64 *guid, const char *name)
 +{
 +      u16 w[4];
 +      int ret = -EINVAL;
 +
 +      if (sscanf(name, "%hx:%hx:%hx:%hx", &w[0], &w[1], &w[2], &w[3]) != 4)
 +              goto out;
 +      *guid = get_unaligned_be64(w);
 +      ret = 0;
 +out:
 +      return ret;
 +}
 +
  /**
   * srpt_parse_i_port_id() - Parse an initiator port ID.
   * @name: ASCII representation of a 128-bit initiator port ID.
   */
  static int srpt_init_nodeacl(struct se_node_acl *se_nacl, const char *name)
  {
 +      u64 guid;
        u8 i_port_id[16];
 +      int ret;
  
 -      if (srpt_parse_i_port_id(i_port_id, name) < 0) {
 +      ret = srpt_parse_guid(&guid, name);
 +      if (ret < 0)
 +              ret = srpt_parse_i_port_id(i_port_id, name);
 +      if (ret < 0)
                pr_err("invalid initiator port ID %s\n", name);
 -              return -EINVAL;
 -      }
 -      return 0;
 +      return ret;
  }
  
  static ssize_t srpt_tpg_attrib_srp_max_rdma_size_show(struct config_item *item,
                char *page)
  {
        struct se_portal_group *se_tpg = attrib_to_tpg(item);
 -      struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
 +      struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
  
        return sprintf(page, "%u\n", sport->port_attrib.srp_max_rdma_size);
  }
@@@ -2833,7 -2793,7 +2832,7 @@@ static ssize_t srpt_tpg_attrib_srp_max_
                const char *page, size_t count)
  {
        struct se_portal_group *se_tpg = attrib_to_tpg(item);
 -      struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
 +      struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
        unsigned long val;
        int ret;
  
@@@ -2861,7 -2821,7 +2860,7 @@@ static ssize_t srpt_tpg_attrib_srp_max_
                char *page)
  {
        struct se_portal_group *se_tpg = attrib_to_tpg(item);
 -      struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
 +      struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
  
        return sprintf(page, "%u\n", sport->port_attrib.srp_max_rsp_size);
  }
@@@ -2870,7 -2830,7 +2869,7 @@@ static ssize_t srpt_tpg_attrib_srp_max_
                const char *page, size_t count)
  {
        struct se_portal_group *se_tpg = attrib_to_tpg(item);
 -      struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
 +      struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
        unsigned long val;
        int ret;
  
@@@ -2898,7 -2858,7 +2897,7 @@@ static ssize_t srpt_tpg_attrib_srp_sq_s
                char *page)
  {
        struct se_portal_group *se_tpg = attrib_to_tpg(item);
 -      struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
 +      struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
  
        return sprintf(page, "%u\n", sport->port_attrib.srp_sq_size);
  }
@@@ -2907,7 -2867,7 +2906,7 @@@ static ssize_t srpt_tpg_attrib_srp_sq_s
                const char *page, size_t count)
  {
        struct se_portal_group *se_tpg = attrib_to_tpg(item);
 -      struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
 +      struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
        unsigned long val;
        int ret;
  
@@@ -2945,7 -2905,7 +2944,7 @@@ static struct configfs_attribute *srpt_
  static ssize_t srpt_tpg_enable_show(struct config_item *item, char *page)
  {
        struct se_portal_group *se_tpg = to_tpg(item);
 -      struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
 +      struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
  
        return snprintf(page, PAGE_SIZE, "%d\n", (sport->enabled) ? 1: 0);
  }
@@@ -2954,7 -2914,7 +2953,7 @@@ static ssize_t srpt_tpg_enable_store(st
                const char *page, size_t count)
  {
        struct se_portal_group *se_tpg = to_tpg(item);
 -      struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
 +      struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
        struct srpt_device *sdev = sport->sdev;
        struct srpt_rdma_ch *ch;
        unsigned long tmp;
@@@ -3006,19 -2966,15 +3005,19 @@@ static struct se_portal_group *srpt_mak
                                             struct config_group *group,
                                             const char *name)
  {
 -      struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn);
 +      struct srpt_port *sport = wwn->priv;
 +      static struct se_portal_group *tpg;
        int res;
  
 -      /* Initialize sport->port_wwn and sport->port_tpg_1 */
 -      res = core_tpg_register(&sport->port_wwn, &sport->port_tpg_1, SCSI_PROTOCOL_SRP);
 +      WARN_ON_ONCE(wwn != &sport->port_guid_wwn &&
 +                   wwn != &sport->port_gid_wwn);
 +      tpg = wwn == &sport->port_guid_wwn ? &sport->port_guid_tpg :
 +              &sport->port_gid_tpg;
 +      res = core_tpg_register(wwn, tpg, SCSI_PROTOCOL_SRP);
        if (res)
                return ERR_PTR(res);
  
 -      return &sport->port_tpg_1;
 +      return tpg;
  }
  
  /**
   */
  static void srpt_drop_tpg(struct se_portal_group *tpg)
  {
 -      struct srpt_port *sport = container_of(tpg,
 -                              struct srpt_port, port_tpg_1);
 +      struct srpt_port *sport = srpt_tpg_to_sport(tpg);
  
        sport->enabled = false;
 -      core_tpg_deregister(&sport->port_tpg_1);
 +      core_tpg_deregister(tpg);
  }
  
  /**
@@@ -3041,7 -2998,19 +3040,7 @@@ static struct se_wwn *srpt_make_tport(s
                                      struct config_group *group,
                                      const char *name)
  {
 -      struct srpt_port *sport;
 -      int ret;
 -
 -      sport = srpt_lookup_port(name);
 -      pr_debug("make_tport(%s)\n", name);
 -      ret = -EINVAL;
 -      if (!sport)
 -              goto err;
 -
 -      return &sport->port_wwn;
 -
 -err:
 -      return ERR_PTR(ret);
 +      return srpt_lookup_wwn(name) ? : ERR_PTR(-EINVAL);
  }
  
  /**
   */
  static void srpt_drop_tport(struct se_wwn *wwn)
  {
 -      struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn);
 -
 -      pr_debug("drop_tport(%s\n", config_item_name(&sport->port_wwn.wwn_group.cg_item));
  }
  
  static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf)
index 09bd3b290bb852354b0fbaef76d6710c13dfc885,f7b86679bafec3f6f10b3fd95b9f6f0aa111c66c..98940d1392cb0cd19d648b6a25f2a5ba36c052d4
@@@ -112,12 -112,12 +112,12 @@@ static struct timer_list queue_timer
   * Domain for untranslated devices - only allocated
   * if iommu=pt passed on kernel cmd line.
   */
 -static const struct iommu_ops amd_iommu_ops;
 +const struct iommu_ops amd_iommu_ops;
  
  static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
  int amd_iommu_max_glx_val = -1;
  
- static struct dma_map_ops amd_iommu_dma_ops;
+ static const struct dma_map_ops amd_iommu_dma_ops;
  
  /*
   * This struct contains device specific data for the IOMMU
@@@ -445,7 -445,6 +445,7 @@@ static void init_iommu_group(struct dev
  static int iommu_init_device(struct device *dev)
  {
        struct iommu_dev_data *dev_data;
 +      struct amd_iommu *iommu;
        int devid;
  
        if (dev->archdata.iommu)
        if (devid < 0)
                return devid;
  
 +      iommu = amd_iommu_rlookup_table[devid];
 +
        dev_data = find_dev_data(devid);
        if (!dev_data)
                return -ENOMEM;
  
        dev->archdata.iommu = dev_data;
  
 -      iommu_device_link(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev,
 -                        dev);
 +      iommu_device_link(&iommu->iommu, dev);
  
        return 0;
  }
@@@ -497,16 -495,13 +497,16 @@@ static void iommu_ignore_device(struct 
  
  static void iommu_uninit_device(struct device *dev)
  {
 -      int devid;
        struct iommu_dev_data *dev_data;
 +      struct amd_iommu *iommu;
 +      int devid;
  
        devid = get_device_id(dev);
        if (devid < 0)
                return;
  
 +      iommu = amd_iommu_rlookup_table[devid];
 +
        dev_data = search_dev_data(devid);
        if (!dev_data)
                return;
        if (dev_data->domain)
                detach_device(dev);
  
 -      iommu_device_unlink(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev,
 -                          dev);
 +      iommu_device_unlink(&iommu->iommu, dev);
  
        iommu_group_remove_device(dev);
  
        /* Remove dma-ops */
-       dev->archdata.dma_ops = NULL;
+       dev->dma_ops = NULL;
  
        /*
         * We keep dev_data around for unplugged devices and reuse it when the
@@@ -2168,7 -2164,7 +2168,7 @@@ static int amd_iommu_add_device(struct 
                                dev_name(dev));
  
                iommu_ignore_device(dev);
-               dev->archdata.dma_ops = &nommu_dma_ops;
+               dev->dma_ops = &nommu_dma_ops;
                goto out;
        }
        init_iommu_group(dev);
        if (domain->type == IOMMU_DOMAIN_IDENTITY)
                dev_data->passthrough = true;
        else
-               dev->archdata.dma_ops = &amd_iommu_dma_ops;
+               dev->dma_ops = &amd_iommu_dma_ops;
  
  out:
        iommu_completion_wait(iommu);
@@@ -2672,7 -2668,7 +2672,7 @@@ static void *alloc_coherent(struct devi
                        return NULL;
  
                page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
 -                                               get_order(size));
 +                                               get_order(size), flag);
                if (!page)
                        return NULL;
        }
@@@ -2732,7 -2728,7 +2732,7 @@@ static int amd_iommu_dma_supported(stru
        return check_device(dev);
  }
  
- static struct dma_map_ops amd_iommu_dma_ops = {
+ static const struct dma_map_ops amd_iommu_dma_ops = {
        .alloc          = alloc_coherent,
        .free           = free_coherent,
        .map_page       = map_page,
@@@ -3165,10 -3161,9 +3165,10 @@@ static bool amd_iommu_capable(enum iomm
        return false;
  }
  
 -static void amd_iommu_get_dm_regions(struct device *dev,
 -                                   struct list_head *head)
 +static void amd_iommu_get_resv_regions(struct device *dev,
 +                                     struct list_head *head)
  {
 +      struct iommu_resv_region *region;
        struct unity_map_entry *entry;
        int devid;
  
                return;
  
        list_for_each_entry(entry, &amd_iommu_unity_map, list) {
 -              struct iommu_dm_region *region;
 +              size_t length;
 +              int prot = 0;
  
                if (devid < entry->devid_start || devid > entry->devid_end)
                        continue;
  
 -              region = kzalloc(sizeof(*region), GFP_KERNEL);
 +              length = entry->address_end - entry->address_start;
 +              if (entry->prot & IOMMU_PROT_IR)
 +                      prot |= IOMMU_READ;
 +              if (entry->prot & IOMMU_PROT_IW)
 +                      prot |= IOMMU_WRITE;
 +
 +              region = iommu_alloc_resv_region(entry->address_start,
 +                                               length, prot,
 +                                               IOMMU_RESV_DIRECT);
                if (!region) {
                        pr_err("Out of memory allocating dm-regions for %s\n",
                                dev_name(dev));
                        return;
                }
 -
 -              region->start = entry->address_start;
 -              region->length = entry->address_end - entry->address_start;
 -              if (entry->prot & IOMMU_PROT_IR)
 -                      region->prot |= IOMMU_READ;
 -              if (entry->prot & IOMMU_PROT_IW)
 -                      region->prot |= IOMMU_WRITE;
 -
                list_add_tail(&region->list, head);
        }
 +
 +      region = iommu_alloc_resv_region(MSI_RANGE_START,
 +                                       MSI_RANGE_END - MSI_RANGE_START + 1,
 +                                       0, IOMMU_RESV_RESERVED);
 +      if (!region)
 +              return;
 +      list_add_tail(&region->list, head);
 +
 +      region = iommu_alloc_resv_region(HT_RANGE_START,
 +                                       HT_RANGE_END - HT_RANGE_START + 1,
 +                                       0, IOMMU_RESV_RESERVED);
 +      if (!region)
 +              return;
 +      list_add_tail(&region->list, head);
  }
  
 -static void amd_iommu_put_dm_regions(struct device *dev,
 +static void amd_iommu_put_resv_regions(struct device *dev,
                                     struct list_head *head)
  {
 -      struct iommu_dm_region *entry, *next;
 +      struct iommu_resv_region *entry, *next;
  
        list_for_each_entry_safe(entry, next, head, list)
                kfree(entry);
  }
  
 -static void amd_iommu_apply_dm_region(struct device *dev,
 +static void amd_iommu_apply_resv_region(struct device *dev,
                                      struct iommu_domain *domain,
 -                                    struct iommu_dm_region *region)
 +                                    struct iommu_resv_region *region)
  {
        struct dma_ops_domain *dma_dom = to_dma_ops_domain(to_pdomain(domain));
        unsigned long start, end;
        WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL);
  }
  
 -static const struct iommu_ops amd_iommu_ops = {
 +const struct iommu_ops amd_iommu_ops = {
        .capable = amd_iommu_capable,
        .domain_alloc = amd_iommu_domain_alloc,
        .domain_free  = amd_iommu_domain_free,
        .add_device = amd_iommu_add_device,
        .remove_device = amd_iommu_remove_device,
        .device_group = amd_iommu_device_group,
 -      .get_dm_regions = amd_iommu_get_dm_regions,
 -      .put_dm_regions = amd_iommu_put_dm_regions,
 -      .apply_dm_region = amd_iommu_apply_dm_region,
 +      .get_resv_regions = amd_iommu_get_resv_regions,
 +      .put_resv_regions = amd_iommu_put_resv_regions,
 +      .apply_resv_region = amd_iommu_apply_resv_region,
        .pgsize_bitmap  = AMD_IOMMU_PGSIZES,
  };
  
diff --combined drivers/nvme/host/rdma.c
index 49b2121af689d37509ad965fa2c7a8851b48fefe,4fc7aa86fe3a14a0a5aa6a5c98897f32e4e49c12..bc20a2442a04256dcdfeffd882bda9043b314bfc
  
  #define NVME_RDMA_MAX_INLINE_SEGMENTS 1
  
 -static const char *const nvme_rdma_cm_status_strs[] = {
 -      [NVME_RDMA_CM_INVALID_LEN]      = "invalid length",
 -      [NVME_RDMA_CM_INVALID_RECFMT]   = "invalid record format",
 -      [NVME_RDMA_CM_INVALID_QID]      = "invalid queue ID",
 -      [NVME_RDMA_CM_INVALID_HSQSIZE]  = "invalid host SQ size",
 -      [NVME_RDMA_CM_INVALID_HRQSIZE]  = "invalid host RQ size",
 -      [NVME_RDMA_CM_NO_RSC]           = "resource not found",
 -      [NVME_RDMA_CM_INVALID_IRD]      = "invalid IRD",
 -      [NVME_RDMA_CM_INVALID_ORD]      = "Invalid ORD",
 -};
 -
 -static const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
 -{
 -      size_t index = status;
 -
 -      if (index < ARRAY_SIZE(nvme_rdma_cm_status_strs) &&
 -          nvme_rdma_cm_status_strs[index])
 -              return nvme_rdma_cm_status_strs[index];
 -      else
 -              return "unrecognized reason";
 -};
 -
  /*
   * We handle AEN commands ourselves and don't even let the
   * block layer know about them.
@@@ -133,10 -155,6 +133,10 @@@ struct nvme_rdma_ctrl 
                struct sockaddr addr;
                struct sockaddr_in addr_in;
        };
 +      union {
 +              struct sockaddr src_addr;
 +              struct sockaddr_in src_addr_in;
 +      };
  
        struct nvme_ctrl        ctrl;
  };
@@@ -549,7 -567,6 +549,7 @@@ static int nvme_rdma_init_queue(struct 
                int idx, size_t queue_size)
  {
        struct nvme_rdma_queue *queue;
 +      struct sockaddr *src_addr = NULL;
        int ret;
  
        queue = &ctrl->queues[idx];
        }
  
        queue->cm_error = -ETIMEDOUT;
 -      ret = rdma_resolve_addr(queue->cm_id, NULL, &ctrl->addr,
 +      if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
 +              src_addr = &ctrl->src_addr;
 +
 +      ret = rdma_resolve_addr(queue->cm_id, src_addr, &ctrl->addr,
                        NVME_RDMA_CONNECT_TIMEOUT_MS);
        if (ret) {
                dev_info(ctrl->ctrl.device,
@@@ -967,7 -981,8 +967,7 @@@ static int nvme_rdma_map_sg_fr(struct n
  }
  
  static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 -              struct request *rq, unsigned int map_len,
 -              struct nvme_command *c)
 +              struct request *rq, struct nvme_command *c)
  {
        struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
        struct nvme_rdma_device *dev = queue->device;
        }
  
        if (count == 1) {
 -              if (rq_data_dir(rq) == WRITE &&
 -                  map_len <= nvme_rdma_inline_data_size(queue) &&
 -                  nvme_rdma_queue_idx(queue))
 +              if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
 +                  blk_rq_payload_bytes(rq) <=
 +                              nvme_rdma_inline_data_size(queue))
                        return nvme_rdma_map_sg_inline(queue, req, c);
  
                if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
@@@ -1251,7 -1266,7 +1251,7 @@@ static int nvme_rdma_addr_resolved(stru
  
        dev = nvme_rdma_find_get_device(queue->cm_id);
        if (!dev) {
-               dev_err(queue->cm_id->device->dma_device,
+               dev_err(queue->cm_id->device->dev.parent,
                        "no client data found!\n");
                return -ECONNREFUSED;
        }
@@@ -1407,9 -1422,9 +1407,9 @@@ static inline bool nvme_rdma_queue_is_r
                struct request *rq)
  {
        if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
 -              struct nvme_command *cmd = (struct nvme_command *)rq->cmd;
 +              struct nvme_command *cmd = nvme_req(rq)->cmd;
  
 -              if (rq->cmd_type != REQ_TYPE_DRV_PRIV ||
 +              if (!blk_rq_is_passthrough(rq) ||
                    cmd->common.opcode != nvme_fabrics_command ||
                    cmd->fabrics.fctype != nvme_fabrics_type_connect)
                        return false;
@@@ -1429,6 -1444,7 +1429,6 @@@ static int nvme_rdma_queue_rq(struct bl
        struct nvme_command *c = sqe->data;
        bool flush = false;
        struct ib_device *dev;
 -      unsigned int map_len;
        int ret;
  
        WARN_ON_ONCE(rq->tag < 0);
  
        blk_mq_start_request(rq);
  
 -      map_len = nvme_map_len(rq);
 -      ret = nvme_rdma_map_data(queue, rq, map_len, c);
 +      ret = nvme_rdma_map_data(queue, rq, c);
        if (ret < 0) {
                dev_err(queue->ctrl->ctrl.device,
                             "Failed to map data (%d)\n", ret);
        ib_dma_sync_single_for_device(dev, sqe->dma,
                        sizeof(struct nvme_command), DMA_TO_DEVICE);
  
 -      if (rq->cmd_type == REQ_TYPE_FS && req_op(rq) == REQ_OP_FLUSH)
 +      if (req_op(rq) == REQ_OP_FLUSH)
                flush = true;
        ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
                        req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
@@@ -1508,7 -1525,7 +1508,7 @@@ static void nvme_rdma_complete_rq(struc
                        return;
                }
  
 -              if (rq->cmd_type == REQ_TYPE_DRV_PRIV)
 +              if (blk_rq_is_passthrough(rq))
                        error = rq->errors;
                else
                        error = nvme_error_status(rq->errors);
@@@ -1891,16 -1908,6 +1891,16 @@@ static struct nvme_ctrl *nvme_rdma_crea
                goto out_free_ctrl;
        }
  
 +      if (opts->mask & NVMF_OPT_HOST_TRADDR) {
 +              ret = nvme_rdma_parse_ipaddr(&ctrl->src_addr_in,
 +                              opts->host_traddr);
 +              if (ret) {
 +                      pr_err("malformed src IP address passed: %s\n",
 +                             opts->host_traddr);
 +                      goto out_free_ctrl;
 +              }
 +      }
 +
        if (opts->mask & NVMF_OPT_TRSVCID) {
                u16 port;
  
@@@ -2012,8 -2019,7 +2012,8 @@@ out_free_ctrl
  static struct nvmf_transport_ops nvme_rdma_transport = {
        .name           = "rdma",
        .required_opts  = NVMF_OPT_TRADDR,
 -      .allowed_opts   = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY,
 +      .allowed_opts   = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
 +                        NVMF_OPT_HOST_TRADDR,
        .create_ctrl    = nvme_rdma_create_ctrl,
  };
  
@@@ -2060,7 -2066,8 +2060,7 @@@ static int __init nvme_rdma_init_module
                return ret;
        }
  
 -      nvmf_register_transport(&nvme_rdma_transport);
 -      return 0;
 +      return nvmf_register_transport(&nvme_rdma_transport);
  }
  
  static void __exit nvme_rdma_cleanup_module(void)
diff --combined include/linux/device.h
index a48a7ff70164beee58f086fa6dca79e88bb6344a,46a567261ccc08cfe16680647432d11b8e99663a..30c4570e928dfe871bc84382f14eb49b5cac018e
@@@ -88,8 -88,6 +88,8 @@@ extern void bus_remove_file(struct bus_
   *
   * @suspend:  Called when a device on this bus wants to go to sleep mode.
   * @resume:   Called to bring a device on this bus out of sleep mode.
 + * @num_vf:   Called to find out how many virtual functions a device on this
 + *            bus supports.
   * @pm:               Power management operations of this bus, callback the specific
   *            device driver's pm-ops.
   * @iommu_ops:  IOMMU specific operations for this bus, used to attach IOMMU
@@@ -129,8 -127,6 +129,8 @@@ struct bus_type 
        int (*suspend)(struct device *dev, pm_message_t state);
        int (*resume)(struct device *dev);
  
 +      int (*num_vf)(struct device *dev);
 +
        const struct dev_pm_ops *pm;
  
        const struct iommu_ops *iommu_ops;
@@@ -925,6 -921,7 +925,7 @@@ struct device 
  #ifdef CONFIG_NUMA
        int             numa_node;      /* NUMA node this device is close to */
  #endif
+       const struct dma_map_ops *dma_ops;
        u64             *dma_mask;      /* dma mask (if dma'able device) */
        u64             coherent_dma_mask;/* Like dma_mask, but for
                                             alloc_coherent mappings as
@@@ -1139,19 -1136,11 +1140,19 @@@ static inline bool device_supports_offl
  extern void lock_device_hotplug(void);
  extern void unlock_device_hotplug(void);
  extern int lock_device_hotplug_sysfs(void);
 +void assert_held_device_hotplug(void);
  extern int device_offline(struct device *dev);
  extern int device_online(struct device *dev);
  extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
  extern void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
  
 +static inline int dev_num_vf(struct device *dev)
 +{
 +      if (dev->bus && dev->bus->num_vf)
 +              return dev->bus->num_vf(dev);
 +      return 0;
 +}
 +
  /*
   * Root device objects for grouping under /sys/devices
   */
index c24721a33b4c5d7816d1da34d3440a2d0d67be2c,426c43d4fdbf49fedbc9a0f0ad85fe8d92c6c3d3..0977317c6835c2526428f61c12fcfab976650b99
   */
  #define DMA_ATTR_NO_WARN      (1UL << 8)
  
 +/*
 + * DMA_ATTR_PRIVILEGED: used to indicate that the buffer is fully
 + * accessible at an elevated privilege level (and ideally inaccessible or
 + * at least read-only at lesser-privileged levels).
 + */
 +#define DMA_ATTR_PRIVILEGED           (1UL << 9)
 +
  /*
   * A dma_addr_t can hold any valid DMA or bus address for the platform.
   * It can be given to a device to use as a DMA source or target.  A CPU cannot
@@@ -134,7 -127,8 +134,8 @@@ struct dma_map_ops 
        int is_phys;
  };
  
- extern struct dma_map_ops dma_noop_ops;
+ extern const struct dma_map_ops dma_noop_ops;
+ extern const struct dma_map_ops dma_virt_ops;
  
  #define DMA_BIT_MASK(n)       (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
  
@@@ -171,14 -165,26 +172,26 @@@ int dma_mmap_from_coherent(struct devic
  
  #ifdef CONFIG_HAS_DMA
  #include <asm/dma-mapping.h>
+ static inline const struct dma_map_ops *get_dma_ops(struct device *dev)
+ {
+       if (dev && dev->dma_ops)
+               return dev->dma_ops;
+       return get_arch_dma_ops(dev ? dev->bus : NULL);
+ }
+ static inline void set_dma_ops(struct device *dev,
+                              const struct dma_map_ops *dma_ops)
+ {
+       dev->dma_ops = dma_ops;
+ }
  #else
  /*
   * Define the dma api to allow compilation but not linking of
   * dma dependent code.  Code that depends on the dma-mapping
   * API needs to set 'depends on HAS_DMA' in its Kconfig
   */
- extern struct dma_map_ops bad_dma_ops;
- static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+ extern const struct dma_map_ops bad_dma_ops;
+ static inline const struct dma_map_ops *get_dma_ops(struct device *dev)
  {
        return &bad_dma_ops;
  }
@@@ -189,7 -195,7 +202,7 @@@ static inline dma_addr_t dma_map_single
                                              enum dma_data_direction dir,
                                              unsigned long attrs)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        dma_addr_t addr;
  
        kmemcheck_mark_initialized(ptr, size);
@@@ -208,7 -214,7 +221,7 @@@ static inline void dma_unmap_single_att
                                          enum dma_data_direction dir,
                                          unsigned long attrs)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
  
        BUG_ON(!valid_dma_direction(dir));
        if (ops->unmap_page)
@@@ -224,7 -230,7 +237,7 @@@ static inline int dma_map_sg_attrs(stru
                                   int nents, enum dma_data_direction dir,
                                   unsigned long attrs)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        int i, ents;
        struct scatterlist *s;
  
@@@ -242,7 -248,7 +255,7 @@@ static inline void dma_unmap_sg_attrs(s
                                      int nents, enum dma_data_direction dir,
                                      unsigned long attrs)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
  
        BUG_ON(!valid_dma_direction(dir));
        debug_dma_unmap_sg(dev, sg, nents, dir);
@@@ -256,7 -262,7 +269,7 @@@ static inline dma_addr_t dma_map_page_a
                                            enum dma_data_direction dir,
                                            unsigned long attrs)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        dma_addr_t addr;
  
        kmemcheck_mark_initialized(page_address(page) + offset, size);
@@@ -272,7 -278,7 +285,7 @@@ static inline void dma_unmap_page_attrs
                                        enum dma_data_direction dir,
                                        unsigned long attrs)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
  
        BUG_ON(!valid_dma_direction(dir));
        if (ops->unmap_page)
@@@ -286,7 -292,7 +299,7 @@@ static inline dma_addr_t dma_map_resour
                                          enum dma_data_direction dir,
                                          unsigned long attrs)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        dma_addr_t addr;
  
        BUG_ON(!valid_dma_direction(dir));
@@@ -307,7 -313,7 +320,7 @@@ static inline void dma_unmap_resource(s
                                      size_t size, enum dma_data_direction dir,
                                      unsigned long attrs)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
  
        BUG_ON(!valid_dma_direction(dir));
        if (ops->unmap_resource)
@@@ -319,7 -325,7 +332,7 @@@ static inline void dma_sync_single_for_
                                           size_t size,
                                           enum dma_data_direction dir)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
  
        BUG_ON(!valid_dma_direction(dir));
        if (ops->sync_single_for_cpu)
@@@ -331,7 -337,7 +344,7 @@@ static inline void dma_sync_single_for_
                                              dma_addr_t addr, size_t size,
                                              enum dma_data_direction dir)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
  
        BUG_ON(!valid_dma_direction(dir));
        if (ops->sync_single_for_device)
@@@ -371,7 -377,7 +384,7 @@@ static inline voi
  dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
                    int nelems, enum dma_data_direction dir)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
  
        BUG_ON(!valid_dma_direction(dir));
        if (ops->sync_sg_for_cpu)
@@@ -383,7 -389,7 +396,7 @@@ static inline voi
  dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
                       int nelems, enum dma_data_direction dir)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
  
        BUG_ON(!valid_dma_direction(dir));
        if (ops->sync_sg_for_device)
@@@ -428,7 -434,7 +441,7 @@@ static inline in
  dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr,
               dma_addr_t dma_addr, size_t size, unsigned long attrs)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        BUG_ON(!ops);
        if (ops->mmap)
                return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
@@@ -446,7 -452,7 +459,7 @@@ dma_get_sgtable_attrs(struct device *de
                      dma_addr_t dma_addr, size_t size,
                      unsigned long attrs)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        BUG_ON(!ops);
        if (ops->get_sgtable)
                return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
@@@ -464,7 -470,7 +477,7 @@@ static inline void *dma_alloc_attrs(str
                                       dma_addr_t *dma_handle, gfp_t flag,
                                       unsigned long attrs)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
        void *cpu_addr;
  
        BUG_ON(!ops);
@@@ -486,7 -492,7 +499,7 @@@ static inline void dma_free_attrs(struc
                                     void *cpu_addr, dma_addr_t dma_handle,
                                     unsigned long attrs)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
  
        BUG_ON(!ops);
        WARN_ON(irqs_disabled());
@@@ -544,7 -550,7 +557,7 @@@ static inline int dma_mapping_error(str
  #ifndef HAVE_ARCH_DMA_SUPPORTED
  static inline int dma_supported(struct device *dev, u64 mask)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
  
        if (!ops)
                return 0;
  #ifndef HAVE_ARCH_DMA_SET_MASK
  static inline int dma_set_mask(struct device *dev, u64 mask)
  {
-       struct dma_map_ops *ops = get_dma_ops(dev);
+       const struct dma_map_ops *ops = get_dma_ops(dev);
  
        if (ops->set_dma_mask)
                return ops->set_dma_mask(dev, mask);
diff --combined include/rdma/ib_verbs.h
index 89f5bd4e1d5201c847ff77823b6b4159a741ef2f,f199c42b9a86f693bbe80f6c96a6055182548e26..d84849c5dc05397421d51776d68fdb09042388f8
@@@ -207,7 -207,6 +207,7 @@@ enum ib_device_cap_flags 
        IB_DEVICE_MEM_WINDOW_TYPE_2A            = (1 << 23),
        IB_DEVICE_MEM_WINDOW_TYPE_2B            = (1 << 24),
        IB_DEVICE_RC_IP_CSUM                    = (1 << 25),
 +      /* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */
        IB_DEVICE_RAW_IP_CSUM                   = (1 << 26),
        /*
         * Devices should set IB_DEVICE_CROSS_CHANNEL if they
        IB_DEVICE_ON_DEMAND_PAGING              = (1ULL << 31),
        IB_DEVICE_SG_GAPS_REG                   = (1ULL << 32),
        IB_DEVICE_VIRTUAL_FUNCTION              = (1ULL << 33),
 +      /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
        IB_DEVICE_RAW_SCATTER_FCS               = (1ULL << 34),
  };
  
@@@ -243,8 -241,7 +243,8 @@@ enum ib_atomic_cap 
  };
  
  enum ib_odp_general_cap_bits {
 -      IB_ODP_SUPPORT = 1 << 0,
 +      IB_ODP_SUPPORT          = 1 << 0,
 +      IB_ODP_SUPPORT_IMPLICIT = 1 << 1,
  };
  
  enum ib_odp_transport_cap_bits {
@@@ -333,7 -330,6 +333,7 @@@ struct ib_device_attr 
        uint64_t                hca_core_clock; /* in KHZ */
        struct ib_rss_caps      rss_caps;
        u32                     max_wq_type_rq;
 +      u32                     raw_packet_caps; /* Use ib_raw_packet_caps enum */
  };
  
  enum ib_mtu {
@@@ -356,20 -352,6 +356,20 @@@ static inline int ib_mtu_enum_to_int(en
        }
  }
  
 +static inline enum ib_mtu ib_mtu_int_to_enum(int mtu)
 +{
 +      if (mtu >= 4096)
 +              return IB_MTU_4096;
 +      else if (mtu >= 2048)
 +              return IB_MTU_2048;
 +      else if (mtu >= 1024)
 +              return IB_MTU_1024;
 +      else if (mtu >= 512)
 +              return IB_MTU_512;
 +      else
 +              return IB_MTU_256;
 +}
 +
  enum ib_port_state {
        IB_PORT_NOP             = 0,
        IB_PORT_DOWN            = 1,
@@@ -503,8 -485,6 +503,8 @@@ static inline struct rdma_hw_stats *rdm
  #define RDMA_CORE_CAP_PROT_ROCE         0x00200000
  #define RDMA_CORE_CAP_PROT_IWARP        0x00400000
  #define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
 +#define RDMA_CORE_CAP_PROT_RAW_PACKET   0x01000000
 +#define RDMA_CORE_CAP_PROT_USNIC        0x02000000
  
  #define RDMA_CORE_PORT_IBA_IB          (RDMA_CORE_CAP_PROT_IB  \
                                        | RDMA_CORE_CAP_IB_MAD \
  #define RDMA_CORE_PORT_INTEL_OPA       (RDMA_CORE_PORT_IBA_IB  \
                                        | RDMA_CORE_CAP_OPA_MAD)
  
 +#define RDMA_CORE_PORT_RAW_PACKET     (RDMA_CORE_CAP_PROT_RAW_PACKET)
 +
 +#define RDMA_CORE_PORT_USNIC          (RDMA_CORE_CAP_PROT_USNIC)
 +
  struct ib_port_attr {
        u64                     subnet_prefix;
        enum ib_port_state      state;
@@@ -1029,7 -1005,6 +1029,7 @@@ enum ib_qp_create_flags 
        IB_QP_CREATE_SIGNATURE_EN               = 1 << 6,
        IB_QP_CREATE_USE_GFP_NOIO               = 1 << 7,
        IB_QP_CREATE_SCATTER_FCS                = 1 << 8,
 +      IB_QP_CREATE_CVLAN_STRIPPING            = 1 << 9,
        /* reserve bits 26-31 for low level drivers' internal use */
        IB_QP_CREATE_RESERVED_START             = 1 << 26,
        IB_QP_CREATE_RESERVED_END               = 1 << 31,
@@@ -1481,18 -1456,6 +1481,18 @@@ struct ib_srq 
        } ext;
  };
  
 +enum ib_raw_packet_caps {
 +      /* Strip cvlan from incoming packet and report it in the matching work
 +       * completion is supported.
 +       */
 +      IB_RAW_PACKET_CAP_CVLAN_STRIPPING       = (1 << 0),
 +      /* Scatter FCS field of an incoming packet to host memory is supported.
 +       */
 +      IB_RAW_PACKET_CAP_SCATTER_FCS           = (1 << 1),
 +      /* Checksum offloads are supported (for both send and receive). */
 +      IB_RAW_PACKET_CAP_IP_CSUM               = (1 << 2),
 +};
 +
  enum ib_wq_type {
        IB_WQT_RQ
  };
@@@ -1516,11 -1479,6 +1516,11 @@@ struct ib_wq 
        atomic_t                usecnt;
  };
  
 +enum ib_wq_flags {
 +      IB_WQ_FLAGS_CVLAN_STRIPPING     = 1 << 0,
 +      IB_WQ_FLAGS_SCATTER_FCS         = 1 << 1,
 +};
 +
  struct ib_wq_init_attr {
        void                   *wq_context;
        enum ib_wq_type wq_type;
        u32             max_sge;
        struct  ib_cq          *cq;
        void                (*event_handler)(struct ib_event *, void *);
 +      u32             create_flags; /* Use enum ib_wq_flags */
  };
  
  enum ib_wq_attr_mask {
 -      IB_WQ_STATE     = 1 << 0,
 -      IB_WQ_CUR_STATE = 1 << 1,
 +      IB_WQ_STATE             = 1 << 0,
 +      IB_WQ_CUR_STATE         = 1 << 1,
 +      IB_WQ_FLAGS             = 1 << 2,
  };
  
  struct ib_wq_attr {
        enum    ib_wq_state     wq_state;
        enum    ib_wq_state     curr_wq_state;
 +      u32                     flags; /* Use enum ib_wq_flags */
 +      u32                     flags_mask; /* Use enum ib_wq_flags */
  };
  
  struct ib_rwq_ind_table {
@@@ -1650,8 -1604,6 +1650,8 @@@ enum ib_flow_spec_type 
        IB_FLOW_SPEC_UDP                = 0x41,
        IB_FLOW_SPEC_VXLAN_TUNNEL       = 0x50,
        IB_FLOW_SPEC_INNER              = 0x100,
 +      /* Actions */
 +      IB_FLOW_SPEC_ACTION_TAG         = 0x1000,
  };
  #define IB_FLOW_SPEC_LAYER_MASK       0xF0
  #define IB_FLOW_SPEC_SUPPORT_LAYERS 8
@@@ -1774,12 -1726,6 +1774,12 @@@ struct ib_flow_spec_tunnel 
        struct ib_flow_tunnel_filter  mask;
  };
  
 +struct ib_flow_spec_action_tag {
 +      enum ib_flow_spec_type        type;
 +      u16                           size;
 +      u32                           tag_id;
 +};
 +
  union ib_flow_spec {
        struct {
                u32                     type;
        struct ib_flow_spec_tcp_udp     tcp_udp;
        struct ib_flow_spec_ipv6        ipv6;
        struct ib_flow_spec_tunnel      tunnel;
 +      struct ib_flow_spec_action_tag  flow_tag;
  };
  
  struct ib_flow_attr {
@@@ -1830,66 -1775,15 +1830,19 @@@ enum ib_mad_result 
  
  #define IB_DEVICE_NAME_MAX 64
  
 +struct ib_port_cache {
 +      struct ib_pkey_cache  *pkey;
 +      struct ib_gid_table   *gid;
 +      u8                     lmc;
 +      enum ib_port_state     port_state;
 +};
 +
  struct ib_cache {
        rwlock_t                lock;
        struct ib_event_handler event_handler;
 -      struct ib_pkey_cache  **pkey_cache;
 -      struct ib_gid_table   **gid_cache;
 -      u8                     *lmc_cache;
 -      enum ib_port_state     *port_state_cache;
 +      struct ib_port_cache   *ports;
  };
  
- struct ib_dma_mapping_ops {
-       int             (*mapping_error)(struct ib_device *dev,
-                                        u64 dma_addr);
-       u64             (*map_single)(struct ib_device *dev,
-                                     void *ptr, size_t size,
-                                     enum dma_data_direction direction);
-       void            (*unmap_single)(struct ib_device *dev,
-                                       u64 addr, size_t size,
-                                       enum dma_data_direction direction);
-       u64             (*map_page)(struct ib_device *dev,
-                                   struct page *page, unsigned long offset,
-                                   size_t size,
-                                   enum dma_data_direction direction);
-       void            (*unmap_page)(struct ib_device *dev,
-                                     u64 addr, size_t size,
-                                     enum dma_data_direction direction);
-       int             (*map_sg)(struct ib_device *dev,
-                                 struct scatterlist *sg, int nents,
-                                 enum dma_data_direction direction);
-       void            (*unmap_sg)(struct ib_device *dev,
-                                   struct scatterlist *sg, int nents,
-                                   enum dma_data_direction direction);
-       int             (*map_sg_attrs)(struct ib_device *dev,
-                                       struct scatterlist *sg, int nents,
-                                       enum dma_data_direction direction,
-                                       unsigned long attrs);
-       void            (*unmap_sg_attrs)(struct ib_device *dev,
-                                         struct scatterlist *sg, int nents,
-                                         enum dma_data_direction direction,
-                                         unsigned long attrs);
-       void            (*sync_single_for_cpu)(struct ib_device *dev,
-                                              u64 dma_handle,
-                                              size_t size,
-                                              enum dma_data_direction dir);
-       void            (*sync_single_for_device)(struct ib_device *dev,
-                                                 u64 dma_handle,
-                                                 size_t size,
-                                                 enum dma_data_direction dir);
-       void            *(*alloc_coherent)(struct ib_device *dev,
-                                          size_t size,
-                                          u64 *dma_handle,
-                                          gfp_t flag);
-       void            (*free_coherent)(struct ib_device *dev,
-                                        size_t size, void *cpu_addr,
-                                        u64 dma_handle);
- };
  struct iw_cm_verbs;
  
  struct ib_port_immutable {
  };
  
  struct ib_device {
-       struct device                *dma_device;
        char                          name[IB_DEVICE_NAME_MAX];
  
        struct list_head              event_handler_list;
                                                           struct ib_rwq_ind_table_init_attr *init_attr,
                                                           struct ib_udata *udata);
        int                        (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
-       struct ib_dma_mapping_ops   *dma_ops;
  
        struct module               *owner;
        struct device                dev;
@@@ -2335,13 -2226,6 +2285,13 @@@ static inline u8 rdma_end_port(const st
        return rdma_cap_ib_switch(device) ? 0 : device->phys_port_cnt;
  }
  
 +static inline int rdma_is_port_valid(const struct ib_device *device,
 +                                   unsigned int port)
 +{
 +      return (port >= rdma_start_port(device) &&
 +              port <= rdma_end_port(device));
 +}
 +
  static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
  {
        return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB;
@@@ -2374,16 -2258,6 +2324,16 @@@ static inline bool rdma_ib_or_roce(cons
                rdma_protocol_roce(device, port_num);
  }
  
 +static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num)
 +{
 +      return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET;
 +}
 +
 +static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num)
 +{
 +      return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_USNIC;
 +}
 +
  /**
   * rdma_cap_ib_mad - Check if the port of a device supports Infiniband
   * Management Datagrams.
@@@ -3043,9 -2917,7 +2993,7 @@@ static inline int ib_req_ncomp_notif(st
   */
  static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
  {
-       if (dev->dma_ops)
-               return dev->dma_ops->mapping_error(dev, dma_addr);
-       return dma_mapping_error(dev->dma_device, dma_addr);
+       return dma_mapping_error(&dev->dev, dma_addr);
  }
  
  /**
@@@ -3059,9 -2931,7 +3007,7 @@@ static inline u64 ib_dma_map_single(str
                                    void *cpu_addr, size_t size,
                                    enum dma_data_direction direction)
  {
-       if (dev->dma_ops)
-               return dev->dma_ops->map_single(dev, cpu_addr, size, direction);
-       return dma_map_single(dev->dma_device, cpu_addr, size, direction);
+       return dma_map_single(&dev->dev, cpu_addr, size, direction);
  }
  
  /**
@@@ -3075,28 -2945,7 +3021,7 @@@ static inline void ib_dma_unmap_single(
                                       u64 addr, size_t size,
                                       enum dma_data_direction direction)
  {
-       if (dev->dma_ops)
-               dev->dma_ops->unmap_single(dev, addr, size, direction);
-       else
-               dma_unmap_single(dev->dma_device, addr, size, direction);
- }
- static inline u64 ib_dma_map_single_attrs(struct ib_device *dev,
-                                         void *cpu_addr, size_t size,
-                                         enum dma_data_direction direction,
-                                         unsigned long dma_attrs)
- {
-       return dma_map_single_attrs(dev->dma_device, cpu_addr, size,
-                                   direction, dma_attrs);
- }
- static inline void ib_dma_unmap_single_attrs(struct ib_device *dev,
-                                            u64 addr, size_t size,
-                                            enum dma_data_direction direction,
-                                            unsigned long dma_attrs)
- {
-       return dma_unmap_single_attrs(dev->dma_device, addr, size,
-                                     direction, dma_attrs);
+       dma_unmap_single(&dev->dev, addr, size, direction);
  }
  
  /**
@@@ -3113,9 -2962,7 +3038,7 @@@ static inline u64 ib_dma_map_page(struc
                                  size_t size,
                                         enum dma_data_direction direction)
  {
-       if (dev->dma_ops)
-               return dev->dma_ops->map_page(dev, page, offset, size, direction);
-       return dma_map_page(dev->dma_device, page, offset, size, direction);
+       return dma_map_page(&dev->dev, page, offset, size, direction);
  }
  
  /**
@@@ -3129,10 -2976,7 +3052,7 @@@ static inline void ib_dma_unmap_page(st
                                     u64 addr, size_t size,
                                     enum dma_data_direction direction)
  {
-       if (dev->dma_ops)
-               dev->dma_ops->unmap_page(dev, addr, size, direction);
-       else
-               dma_unmap_page(dev->dma_device, addr, size, direction);
+       dma_unmap_page(&dev->dev, addr, size, direction);
  }
  
  /**
@@@ -3146,9 -2990,7 +3066,7 @@@ static inline int ib_dma_map_sg(struct 
                                struct scatterlist *sg, int nents,
                                enum dma_data_direction direction)
  {
-       if (dev->dma_ops)
-               return dev->dma_ops->map_sg(dev, sg, nents, direction);
-       return dma_map_sg(dev->dma_device, sg, nents, direction);
+       return dma_map_sg(&dev->dev, sg, nents, direction);
  }
  
  /**
@@@ -3162,10 -3004,7 +3080,7 @@@ static inline void ib_dma_unmap_sg(stru
                                   struct scatterlist *sg, int nents,
                                   enum dma_data_direction direction)
  {
-       if (dev->dma_ops)
-               dev->dma_ops->unmap_sg(dev, sg, nents, direction);
-       else
-               dma_unmap_sg(dev->dma_device, sg, nents, direction);
+       dma_unmap_sg(&dev->dev, sg, nents, direction);
  }
  
  static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
                                      enum dma_data_direction direction,
                                      unsigned long dma_attrs)
  {
-       if (dev->dma_ops)
-               return dev->dma_ops->map_sg_attrs(dev, sg, nents, direction,
-                                                 dma_attrs);
-       else
-               return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
-                                       dma_attrs);
+       return dma_map_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs);
  }
  
  static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
                                         enum dma_data_direction direction,
                                         unsigned long dma_attrs)
  {
-       if (dev->dma_ops)
-               return dev->dma_ops->unmap_sg_attrs(dev, sg, nents, direction,
-                                                 dma_attrs);
-       else
-               dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction,
-                                  dma_attrs);
+       dma_unmap_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs);
  }
  /**
   * ib_sg_dma_address - Return the DMA address from a scatter/gather entry
@@@ -3233,10 -3062,7 +3138,7 @@@ static inline void ib_dma_sync_single_f
                                              size_t size,
                                              enum dma_data_direction dir)
  {
-       if (dev->dma_ops)
-               dev->dma_ops->sync_single_for_cpu(dev, addr, size, dir);
-       else
-               dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
+       dma_sync_single_for_cpu(&dev->dev, addr, size, dir);
  }
  
  /**
@@@ -3251,10 -3077,7 +3153,7 @@@ static inline void ib_dma_sync_single_f
                                                 size_t size,
                                                 enum dma_data_direction dir)
  {
-       if (dev->dma_ops)
-               dev->dma_ops->sync_single_for_device(dev, addr, size, dir);
-       else
-               dma_sync_single_for_device(dev->dma_device, addr, size, dir);
+       dma_sync_single_for_device(&dev->dev, addr, size, dir);
  }
  
  /**
   */
  static inline void *ib_dma_alloc_coherent(struct ib_device *dev,
                                           size_t size,
-                                          u64 *dma_handle,
+                                          dma_addr_t *dma_handle,
                                           gfp_t flag)
  {
-       if (dev->dma_ops)
-               return dev->dma_ops->alloc_coherent(dev, size, dma_handle, flag);
-       else {
-               dma_addr_t handle;
-               void *ret;
-               ret = dma_alloc_coherent(dev->dma_device, size, &handle, flag);
-               *dma_handle = handle;
-               return ret;
-       }
+       return dma_alloc_coherent(&dev->dev, size, dma_handle, flag);
  }
  
  /**
   */
  static inline void ib_dma_free_coherent(struct ib_device *dev,
                                        size_t size, void *cpu_addr,
-                                       u64 dma_handle)
+                                       dma_addr_t dma_handle)
  {
-       if (dev->dma_ops)
-               dev->dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
-       else
-               dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
+       dma_free_coherent(&dev->dev, size, cpu_addr, dma_handle);
  }
  
  /**
diff --combined lib/Kconfig
index fe7e8e175db8f7b4c3e72f62c6dffeb01d0ecaee,97af23b26f3383477e30349f20a47362b1c96e61..8f69579dfac3e78a57246b6b4447d29e68a2ba02
@@@ -103,7 -103,8 +103,7 @@@ config CRC3
          functions require M here.
  
  config CRC32_SELFTEST
 -      bool "CRC32 perform self test on init"
 -      default n
 +      tristate "CRC32 perform self test on init"
        depends on CRC32
        help
          This option enables the CRC32 library functions to perform a
@@@ -394,6 -395,16 +394,16 @@@ config HAS_DM
        depends on !NO_DMA
        default y
  
+ config DMA_NOOP_OPS
+       bool
+       depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
+       default n
+ config DMA_VIRT_OPS
+       bool
+       depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
+       default n
  config CHECK_SIGNATURE
        bool
  
@@@ -431,7 -442,8 +441,7 @@@ config GLO
          depends on this.
  
  config GLOB_SELFTEST
 -      bool "glob self-test on init"
 -      default n
 +      tristate "glob self-test on init"
        depends on GLOB
        help
          This option enables a simple self-test of the glob_match
@@@ -548,10 -560,4 +558,10 @@@ config STACKDEPO
  config SBITMAP
        bool
  
 +config PARMAN
 +      tristate
 +
 +config PRIME_NUMBERS
 +      tristate
 +
  endmenu
diff --combined lib/Makefile
index 445a39c21f465ba1a0bf2fa1764027b883b814ef,b97e9a8148549497f6246dea01e3c9590227698f..c9023efbd4ca2b111a3b76e5c85b0865a64beeb4
@@@ -22,17 -22,17 +22,18 @@@ lib-y := ctype.o string.o vsprintf.o cm
         sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
         flex_proportions.o ratelimit.o show_mem.o \
         is_single_threaded.o plist.o decompress.o kobject_uevent.o \
 -       earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
 +       earlycpio.o seq_buf.o siphash.o \
 +       nmi_backtrace.o nodemask.o win_minmax.o
  
  lib-$(CONFIG_MMU) += ioremap.o
  lib-$(CONFIG_SMP) += cpumask.o
- lib-$(CONFIG_HAS_DMA) += dma-noop.o
+ lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o
+ lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o
  
  lib-y += kobject.o klist.o
  obj-y += lockref.o
  
 -obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 +obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
         bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
         gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
         bsearch.o find_bit.o llist.o memweight.o kfifo.o \
@@@ -45,19 -45,17 +46,19 @@@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexd
  obj-y += kstrtox.o
  obj-$(CONFIG_TEST_BPF) += test_bpf.o
  obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
 -obj-$(CONFIG_TEST_HASH) += test_hash.o
 +obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
  obj-$(CONFIG_TEST_KASAN) += test_kasan.o
  obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
  obj-$(CONFIG_TEST_LKM) += test_module.o
  obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
 +obj-$(CONFIG_TEST_SORT) += test_sort.o
  obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
  obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
  obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
  obj-$(CONFIG_TEST_PRINTF) += test_printf.o
  obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
  obj-$(CONFIG_TEST_UUID) += test_uuid.o
 +obj-$(CONFIG_TEST_PARMAN) += test_parman.o
  
  ifeq ($(CONFIG_DEBUG_KOBJECT),y)
  CFLAGS_kobject.o += -DDEBUG
@@@ -93,7 -91,6 +94,7 @@@ obj-$(CONFIG_CRC16)   += crc16.
  obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
  obj-$(CONFIG_CRC_ITU_T)       += crc-itu-t.o
  obj-$(CONFIG_CRC32)   += crc32.o
 +obj-$(CONFIG_CRC32_SELFTEST)  += crc32test.o
  obj-$(CONFIG_CRC7)    += crc7.o
  obj-$(CONFIG_LIBCRC32C)       += libcrc32c.o
  obj-$(CONFIG_CRC8)    += crc8.o
@@@ -163,7 -160,6 +164,7 @@@ obj-$(CONFIG_CORDIC) += cordic.
  obj-$(CONFIG_DQL) += dynamic_queue_limits.o
  
  obj-$(CONFIG_GLOB) += glob.o
 +obj-$(CONFIG_GLOB_SELFTEST) += globtest.o
  
  obj-$(CONFIG_MPILIB) += mpi/
  obj-$(CONFIG_SIGNATURE) += digsig.o
@@@ -201,8 -197,6 +202,8 @@@ obj-$(CONFIG_ASN1) += asn1_decoder.
  
  obj-$(CONFIG_FONT_SUPPORT) += fonts/
  
 +obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o
 +
  hostprogs-y   := gen_crc32table
  clean-files   := crc32table.h
  
@@@ -236,5 -230,3 +237,5 @@@ obj-$(CONFIG_UBSAN) += ubsan.
  UBSAN_SANITIZE_ubsan.o := n
  
  obj-$(CONFIG_SBITMAP) += sbitmap.o
 +
 +obj-$(CONFIG_PARMAN) += parman.o
diff --combined net/rds/ib.h
index 540458928f3c8eab4529b4072826b882563c941e,4a520826da232a7db7309596e18b02fbf66d8672..ec550626e221cb80fa5aaec7a00116dc3bd55c5d
  
  #define RDS_IB_DEFAULT_RECV_WR                1024
  #define RDS_IB_DEFAULT_SEND_WR                256
 -#define RDS_IB_DEFAULT_FR_WR          512
 +#define RDS_IB_DEFAULT_FR_WR          256
 +#define RDS_IB_DEFAULT_FR_INV_WR      256
  
 -#define RDS_IB_DEFAULT_RETRY_COUNT    2
 +#define RDS_IB_DEFAULT_RETRY_COUNT    1
  
  #define RDS_IB_SUPPORTED_PROTOCOLS    0x00000003      /* minor versions supported */
  
@@@ -126,7 -125,6 +126,7 @@@ struct rds_ib_connection 
  
        /* To control the number of wrs from fastreg */
        atomic_t                i_fastreg_wrs;
 +      atomic_t                i_fastunreg_wrs;
  
        /* interrupt handling */
        struct tasklet_struct   i_send_tasklet;
        struct rds_ib_work_ring i_send_ring;
        struct rm_data_op       *i_data_op;
        struct rds_header       *i_send_hdrs;
-       u64                     i_send_hdrs_dma;
+       dma_addr_t              i_send_hdrs_dma;
        struct rds_ib_send_work *i_sends;
        atomic_t                i_signaled_sends;
  
        struct rds_ib_incoming  *i_ibinc;
        u32                     i_recv_data_rem;
        struct rds_header       *i_recv_hdrs;
-       u64                     i_recv_hdrs_dma;
+       dma_addr_t              i_recv_hdrs_dma;
        struct rds_ib_recv_work *i_recvs;
        u64                     i_ack_recv;     /* last ACK received */
        struct rds_ib_refill_cache i_cache_incs;
        struct rds_ib_refill_cache i_cache_frags;
 +      atomic_t                i_cache_allocs;
  
        /* sending acks */
        unsigned long           i_ack_flags;
        struct rds_header       *i_ack;
        struct ib_send_wr       i_ack_wr;
        struct ib_sge           i_ack_sge;
-       u64                     i_ack_dma;
+       dma_addr_t              i_ack_dma;
        unsigned long           i_ack_queued;
  
        /* Flow control related information
  
        /* Batched completions */
        unsigned int            i_unsignaled_wrs;
 +
 +      /* Endpoint role in connection */
 +      bool                    i_active_side;
 +      atomic_t                i_cq_quiesce;
 +
 +      /* Send/Recv vectors */
 +      int                     i_scq_vector;
 +      int                     i_rcq_vector;
  };
  
  /* This assumes that atomic_t is at least 32 bits */
@@@ -232,10 -221,9 +232,10 @@@ struct rds_ib_device 
        spinlock_t              spinlock;       /* protect the above */
        atomic_t                refcount;
        struct work_struct      free_work;
 +      int                     *vector_load;
  };
  
- #define ibdev_to_node(ibdev) dev_to_node(ibdev->dma_device)
+ #define ibdev_to_node(ibdev) dev_to_node((ibdev)->dev.parent)
  #define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev)
  
  /* bits for i_ack_flags */
@@@ -261,8 -249,6 +261,8 @@@ struct rds_ib_statistics 
        uint64_t        s_ib_rx_refill_from_cq;
        uint64_t        s_ib_rx_refill_from_thread;
        uint64_t        s_ib_rx_alloc_limit;
 +      uint64_t        s_ib_rx_total_frags;
 +      uint64_t        s_ib_rx_total_incs;
        uint64_t        s_ib_rx_credit_updates;
        uint64_t        s_ib_ack_sent;
        uint64_t        s_ib_ack_send_failure;
        uint64_t        s_ib_rdma_mr_1m_reused;
        uint64_t        s_ib_atomic_cswp;
        uint64_t        s_ib_atomic_fadd;
 +      uint64_t        s_ib_recv_added_to_cache;
 +      uint64_t        s_ib_recv_removed_from_cache;
  };
  
  extern struct workqueue_struct *rds_ib_wq;
@@@ -417,8 -401,6 +417,8 @@@ int rds_ib_xmit_atomic(struct rds_conne
  /* ib_stats.c */
  DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats);
  #define rds_ib_stats_inc(member) rds_stats_inc_which(rds_ib_stats, member)
 +#define rds_ib_stats_add(member, count) \
 +              rds_stats_add_which(rds_ib_stats, member, count)
  unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
                                    unsigned int avail);