]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/commitdiff
RDMA/mlx5: Use PA mapping for PI handover
authorMax Gurtovoy <maxg@mellanox.com>
Tue, 11 Jun 2019 15:52:56 +0000 (18:52 +0300)
committerJason Gunthorpe <jgg@mellanox.com>
Mon, 24 Jun 2019 14:49:28 +0000 (11:49 -0300)
If possibe, avoid doing a UMR operation to register data and protection
buffers (via MTT/KLM mkeys). Instead, use the local DMA key and map the
SG lists using PA access. This is safe, since the internal key for data
and protection never exposed to the remote server (only signature key
might be exposed). If PA mappings are not possible, perform mapping
using MTT/KLM descriptors.

The setup of the tested benchmark (using iSER ULP):
 - 2 servers with 24 cores (1 initiator and 1 target)
 - ConnectX-4/ConnectX-5 adapters
 - 24 target sessions with 1 LUN each
 - ramdisk backstore
 - PI active

Performance results running fio (24 jobs, 128 iodepth) using
write_generate=1 and read_verify=1 (w/w.o patch):

bs      IOPS(read)        IOPS(write)
----    ----------        ----------
512   1266.4K/1262.4K    1720.1K/1732.1K
4k    793139/570902      1129.6K/773982
32k   72660/72086        97229/96164

Using write_generate=0 and read_verify=0 (w/w.o patch):
bs      IOPS(read)        IOPS(write)
----    ----------        ----------
512   1590.2K/1600.1K    1828.2K/1830.3K
4k    1078.1K/937272     1142.1K/815304
32k   77012/77369        98125/97435

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Israel Rukshin <israelr@mellanox.com>
Suggested-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/qp.c

index 40505952132142fc9c18017c3de0aa49ab2a7d29..bdb83fc85f94a6a03bb1539cfedba6bd96cf886c 100644 (file)
@@ -609,6 +609,7 @@ struct mlx5_ib_mr {
        struct mlx5_ib_mr      *pi_mr;
        struct mlx5_ib_mr      *klm_mr;
        struct mlx5_ib_mr      *mtt_mr;
+       u64                     data_iova;
        u64                     pi_iova;
 
        atomic_t                num_leaf_free;
index 8b40abd0070b8582b369d5a4251992afa96a3fff..f2ef89e48afa74f61f77ef3acee833a2fbdbf6d4 100644 (file)
@@ -2001,6 +2001,40 @@ done:
        return ret;
 }
 
+static int
+mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+                       int data_sg_nents, unsigned int *data_sg_offset,
+                       struct scatterlist *meta_sg, int meta_sg_nents,
+                       unsigned int *meta_sg_offset)
+{
+       struct mlx5_ib_mr *mr = to_mmr(ibmr);
+       unsigned int sg_offset = 0;
+       int n = 0;
+
+       mr->meta_length = 0;
+       if (data_sg_nents == 1) {
+               n++;
+               mr->ndescs = 1;
+               if (data_sg_offset)
+                       sg_offset = *data_sg_offset;
+               mr->data_length = sg_dma_len(data_sg) - sg_offset;
+               mr->data_iova = sg_dma_address(data_sg) + sg_offset;
+               if (meta_sg_nents == 1) {
+                       n++;
+                       mr->meta_ndescs = 1;
+                       if (meta_sg_offset)
+                               sg_offset = *meta_sg_offset;
+                       else
+                               sg_offset = 0;
+                       mr->meta_length = sg_dma_len(meta_sg) - sg_offset;
+                       mr->pi_iova = sg_dma_address(meta_sg) + sg_offset;
+               }
+               ibmr->length = mr->data_length + mr->meta_length;
+       }
+
+       return n;
+}
+
 static int
 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
                   struct scatterlist *sgl,
@@ -2099,7 +2133,6 @@ mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
        struct mlx5_ib_mr *mr = to_mmr(ibmr);
        struct mlx5_ib_mr *pi_mr = mr->mtt_mr;
        int n;
-       u64 iova;
 
        pi_mr->ndescs = 0;
        pi_mr->meta_ndescs = 0;
@@ -2115,13 +2148,14 @@ mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
        if (n != data_sg_nents)
                return n;
 
-       iova = pi_mr->ibmr.iova;
+       pi_mr->data_iova = pi_mr->ibmr.iova;
        pi_mr->data_length = pi_mr->ibmr.length;
        pi_mr->ibmr.length = pi_mr->data_length;
        ibmr->length = pi_mr->data_length;
 
        if (meta_sg_nents) {
                u64 page_mask = ~((u64)ibmr->page_size - 1);
+               u64 iova = pi_mr->data_iova;
 
                n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents,
                                    meta_sg_offset, mlx5_set_page_pi);
@@ -2181,6 +2215,7 @@ mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
                                      DMA_TO_DEVICE);
 
        /* This is zero-based memory region */
+       pi_mr->data_iova = 0;
        pi_mr->ibmr.iova = 0;
        pi_mr->pi_iova = pi_mr->data_length;
        ibmr->length = pi_mr->ibmr.length;
@@ -2194,11 +2229,27 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
                         unsigned int *meta_sg_offset)
 {
        struct mlx5_ib_mr *mr = to_mmr(ibmr);
-       struct mlx5_ib_mr *pi_mr = mr->mtt_mr;
+       struct mlx5_ib_mr *pi_mr = NULL;
        int n;
 
        WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
 
+       mr->ndescs = 0;
+       mr->data_length = 0;
+       mr->data_iova = 0;
+       mr->meta_ndescs = 0;
+       mr->pi_iova = 0;
+       /*
+        * As a performance optimization, if possible, there is no need to
+        * perform UMR operation to register the data/metadata buffers.
+        * First try to map the sg lists to PA descriptors with local_dma_lkey.
+        * Fallback to UMR only in case of a failure.
+        */
+       n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents,
+                                   data_sg_offset, meta_sg, meta_sg_nents,
+                                   meta_sg_offset);
+       if (n == data_sg_nents + meta_sg_nents)
+               goto out;
        /*
         * As a performance optimization, if possible, there is no need to map
         * the sg lists to KLM descriptors. First try to map the sg lists to MTT
@@ -2207,6 +2258,7 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
         * (especially in high load).
         * Use KLM (indirect access) only if it's mandatory.
         */
+       pi_mr = mr->mtt_mr;
        n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents,
                                     data_sg_offset, meta_sg, meta_sg_nents,
                                     meta_sg_offset);
@@ -2224,7 +2276,10 @@ out:
        /* This is zero-based memory region */
        ibmr->iova = 0;
        mr->pi_mr = pi_mr;
-       ibmr->sig_attrs->meta_length = pi_mr->meta_length;
+       if (pi_mr)
+               ibmr->sig_attrs->meta_length = pi_mr->meta_length;
+       else
+               ibmr->sig_attrs->meta_length = mr->meta_length;
 
        return 0;
 }
index f0962be5b11c8b1686f03cc0dcddf010aeb40765..4fbf60fed3742e4712009e2b94c017a7fb6179cd 100644 (file)
@@ -4562,7 +4562,7 @@ static int set_sig_data_segment(const struct ib_send_wr *send_wr,
 
        data_len = pi_mr->data_length;
        data_key = pi_mr->ibmr.lkey;
-       data_va = pi_mr->ibmr.iova;
+       data_va = pi_mr->data_iova;
        if (pi_mr->meta_ndescs) {
                prot_len = pi_mr->meta_length;
                prot_key = pi_mr->ibmr.lkey;
@@ -4912,6 +4912,7 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
        struct mlx5_ib_qp *qp;
        struct mlx5_ib_mr *mr;
        struct mlx5_ib_mr *pi_mr;
+       struct mlx5_ib_mr pa_pi_mr;
        struct ib_sig_attrs *sig_attrs;
        struct mlx5_wqe_xrc_seg *xrc;
        struct mlx5_bf *bf;
@@ -5026,35 +5027,62 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
                                break;
 
                        case IB_WR_REG_MR_INTEGRITY:
-                               memset(&reg_pi_wr, 0, sizeof(struct ib_reg_wr));
+                               qp->sq.wr_data[idx] = IB_WR_REG_MR_INTEGRITY;
 
                                mr = to_mmr(reg_wr(wr)->mr);
                                pi_mr = mr->pi_mr;
 
-                               reg_pi_wr.mr = &pi_mr->ibmr;
-                               reg_pi_wr.access = reg_wr(wr)->access;
-                               reg_pi_wr.key = pi_mr->ibmr.rkey;
-
-                               qp->sq.wr_data[idx] = IB_WR_REG_MR_INTEGRITY;
-                               ctrl->imm = cpu_to_be32(reg_pi_wr.key);
-                               /* UMR for data + protection registration */
-                               err = set_reg_wr(qp, &reg_pi_wr, &seg, &size,
-                                                &cur_edge, false);
-                               if (err) {
-                                       *bad_wr = wr;
-                                       goto out;
-                               }
-                               finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
-                                          wr->wr_id, nreq, fence,
-                                          MLX5_OPCODE_UMR);
-
-                               err = begin_wqe(qp, &seg, &ctrl, wr, &idx,
-                                               &size, &cur_edge, nreq);
-                               if (err) {
-                                       mlx5_ib_warn(dev, "\n");
-                                       err = -ENOMEM;
-                                       *bad_wr = wr;
-                                       goto out;
+                               if (pi_mr) {
+                                       memset(&reg_pi_wr, 0,
+                                              sizeof(struct ib_reg_wr));
+
+                                       reg_pi_wr.mr = &pi_mr->ibmr;
+                                       reg_pi_wr.access = reg_wr(wr)->access;
+                                       reg_pi_wr.key = pi_mr->ibmr.rkey;
+
+                                       ctrl->imm = cpu_to_be32(reg_pi_wr.key);
+                                       /* UMR for data + prot registration */
+                                       err = set_reg_wr(qp, &reg_pi_wr, &seg,
+                                                        &size, &cur_edge,
+                                                        false);
+                                       if (err) {
+                                               *bad_wr = wr;
+                                               goto out;
+                                       }
+                                       finish_wqe(qp, ctrl, seg, size,
+                                                  cur_edge, idx, wr->wr_id,
+                                                  nreq, fence,
+                                                  MLX5_OPCODE_UMR);
+
+                                       err = begin_wqe(qp, &seg, &ctrl, wr,
+                                                       &idx, &size, &cur_edge,
+                                                       nreq);
+                                       if (err) {
+                                               mlx5_ib_warn(dev, "\n");
+                                               err = -ENOMEM;
+                                               *bad_wr = wr;
+                                               goto out;
+                                       }
+                               } else {
+                                       memset(&pa_pi_mr, 0,
+                                              sizeof(struct mlx5_ib_mr));
+                                       /* No UMR, use local_dma_lkey */
+                                       pa_pi_mr.ibmr.lkey =
+                                               mr->ibmr.pd->local_dma_lkey;
+
+                                       pa_pi_mr.ndescs = mr->ndescs;
+                                       pa_pi_mr.data_length = mr->data_length;
+                                       pa_pi_mr.data_iova = mr->data_iova;
+                                       if (mr->meta_ndescs) {
+                                               pa_pi_mr.meta_ndescs =
+                                                       mr->meta_ndescs;
+                                               pa_pi_mr.meta_length =
+                                                       mr->meta_length;
+                                               pa_pi_mr.pi_iova = mr->pi_iova;
+                                       }
+
+                                       pa_pi_mr.ibmr.length = mr->ibmr.length;
+                                       mr->pi_mr = &pa_pi_mr;
                                }
                                ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
                                /* UMR for sig MR */