Message ID | 1549560811-8655-10-git-send-email-maxg@mellanox.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Introduce new API for T10-PI offload | expand |
On 2/7/19 9:33 AM, Max Gurtovoy wrote: > mlx5_ib_map_mr_sg_pi() will map the PI and data dma mapped SG lists to the > mlx5 memory region prior to the registration operation. In the new > API, the mlx5 driver will allocate an internal memory region for the > UMR operation to register both PI and data SG lists. The internal MR > will use KLM mode in order to map 2 (possibly non-contiguous/non-align) > SG lists using 1 memory key. In the new API, each ULP will use 1 memory > region for the signature operation (instead of 3 in the old API). This > memory region will have a key that will be exposed to remote server to > perform RDMA operation. The internal memory key that will map the SG lists > will stay private. > > Signed-off-by: Max Gurtovoy <maxg@mellanox.com> > Signed-off-by: Israel Rukshin <israelr@mellanox.com> > --- > drivers/infiniband/hw/mlx5/main.c | 2 + > drivers/infiniband/hw/mlx5/mlx5_ib.h | 9 ++ > drivers/infiniband/hw/mlx5/mr.c | 183 ++++++++++++++++++++++++++++++++--- > 3 files changed, 179 insertions(+), 15 deletions(-) > > diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c > index 94fe253d4956..c4b2d9db5d07 100644 > --- a/drivers/infiniband/hw/mlx5/main.c > +++ b/drivers/infiniband/hw/mlx5/main.c > @@ -5837,6 +5837,7 @@ static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) > static const struct ib_device_ops mlx5_ib_dev_ops = { > .add_gid = mlx5_ib_add_gid, > .alloc_mr = mlx5_ib_alloc_mr, > + .alloc_mr_integrity = mlx5_ib_alloc_mr_integrity, > .alloc_pd = mlx5_ib_alloc_pd, > .alloc_ucontext = mlx5_ib_alloc_ucontext, > .attach_mcast = mlx5_ib_mcg_attach, > @@ -5866,6 +5867,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { > .get_dma_mr = mlx5_ib_get_dma_mr, > .get_link_layer = mlx5_ib_port_link_layer, > .map_mr_sg = mlx5_ib_map_mr_sg, > + .map_mr_sg_pi = mlx5_ib_map_mr_sg_pi, > .mmap = mlx5_ib_mmap, > .modify_cq = mlx5_ib_modify_cq, > .modify_device = mlx5_ib_modify_device, > diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h > index 33b0d042ef05..7ef1c0a3c886 100644 > --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h > +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h > @@ -567,6 +567,9 @@ struct mlx5_ib_mr { > void *descs; > dma_addr_t desc_map; > int ndescs; > + int data_length; > + int meta_ndescs; > + int meta_length; > int max_descs; > int desc_size; > int access_mode; > @@ -585,6 +588,7 @@ struct mlx5_ib_mr { > int access_flags; /* Needed for rereg MR */ > > struct mlx5_ib_mr *parent; > + struct mlx5_ib_mr *pi_mr; /* Needed for IB_MR_TYPE_PI type */ > atomic_t num_leaf_free; > wait_queue_head_t q_leaf_free; > }; > @@ -1107,7 +1111,12 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr); > struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, > enum ib_mr_type mr_type, > u32 max_num_sg); > +struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, > + u32 max_num_sg, > + u32 max_num_meta_sg); > int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct ib_scatterlist *ib_sg); > +int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct ib_scatterlist *data_ib_sg, > + struct ib_scatterlist *meta_ib_sg); > int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, > const struct ib_wc *in_wc, const struct ib_grh *in_grh, > const struct ib_mad_hdr *in, size_t in_mad_size, > diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c > index 659d39734523..b17e7078bdc4 100644 > --- a/drivers/infiniband/hw/mlx5/mr.c > +++ b/drivers/infiniband/hw/mlx5/mr.c > @@ -1684,17 +1684,22 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) > > int mlx5_ib_dereg_mr(struct ib_mr *ibmr) > { > - dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr)); > + struct mlx5_ib_mr *mmr = to_mmr(ibmr); > + > + if (ibmr->type == IB_MR_TYPE_PI) > + dereg_mr(to_mdev(mmr->pi_mr->ibmr.device), mmr->pi_mr); > + > + dereg_mr(to_mdev(ibmr->device), mmr); > + > return 0; > } > > -struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, > - enum ib_mr_type mr_type, > - u32 max_num_sg) > +static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, > + u32 max_num_sg, u32 max_num_meta_sg) > { > struct mlx5_ib_dev *dev = to_mdev(pd->device); > int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); > - int ndescs = ALIGN(max_num_sg, 4); > + int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4); > struct mlx5_ib_mr *mr; > void *mkc; > u32 *in; > @@ -1716,8 +1721,72 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, > MLX5_SET(mkc, mkc, qpn, 0xffffff); > MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); > > + mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; > + > + err = mlx5_alloc_priv_descs(pd->device, mr, > + ndescs, sizeof(struct mlx5_klm)); > + if (err) > + goto err_free_in; > + mr->desc_size = sizeof(struct mlx5_klm); > + mr->max_descs = ndescs; > + > + MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3); > + MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7); > + MLX5_SET(mkc, mkc, umr_en, 1); > + > + mr->ibmr.pd = pd; > + mr->ibmr.device = pd->device; > + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); > + if (err) > + goto err_priv_descs; > + > + mr->mmkey.type = MLX5_MKEY_MR; > + mr->ibmr.lkey = mr->mmkey.key; > + mr->ibmr.rkey = mr->mmkey.key; > + mr->umem = NULL; > + kfree(in); > + > + return mr; > + > +err_priv_descs: > + mlx5_free_priv_descs(mr); > +err_free_in: > + kfree(in); > +err_free: > + kfree(mr); > + return ERR_PTR(err); > +} > + > +static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, > + enum ib_mr_type mr_type, > + u32 max_num_sg, u32 max_num_meta_sg) > +{ > + struct mlx5_ib_dev *dev = to_mdev(pd->device); > + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); > + int ndescs = ALIGN(max_num_sg, 4); > + struct mlx5_ib_mr *mr; > + void *mkc; > + u32 *in; > + int err; > + > + mr = kzalloc(sizeof(*mr), GFP_KERNEL); > + if (!mr) > + return ERR_PTR(-ENOMEM); > + > + in = kzalloc(inlen, GFP_KERNEL); > + if (!in) { > + err = -ENOMEM; > + goto err_free; > + } > + > + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); > + MLX5_SET(mkc, mkc, free, 1); > + MLX5_SET(mkc, mkc, qpn, 0xffffff); > + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); > + > if (mr_type == IB_MR_TYPE_MEM_REG) { > mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT; > + MLX5_SET(mkc, mkc, translations_octword_size, ndescs); > MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); > err = mlx5_alloc_priv_descs(pd->device, mr, > ndescs, sizeof(struct mlx5_mtt)); > @@ -1728,6 +1797,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, > mr->max_descs = ndescs; > } else if (mr_type == IB_MR_TYPE_SG_GAPS) { > mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; > + MLX5_SET(mkc, mkc, translations_octword_size, ndescs); > > err = mlx5_alloc_priv_descs(pd->device, mr, > ndescs, sizeof(struct mlx5_klm)); > @@ -1735,11 +1805,13 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, > goto err_free_in; > mr->desc_size = sizeof(struct mlx5_klm); > mr->max_descs = ndescs; > - } else if (mr_type == IB_MR_TYPE_SIGNATURE) { > + } else if (mr_type == IB_MR_TYPE_SIGNATURE || > + mr_type == IB_MR_TYPE_PI) { > u32 psv_index[2]; > > MLX5_SET(mkc, mkc, bsf_en, 1); > MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); > + MLX5_SET(mkc, mkc, translations_octword_size, 4); > mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); > if (!mr->sig) { > err = -ENOMEM; > @@ -1760,6 +1832,14 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, > mr->sig->sig_err_exists = false; > /* Next UMR, Arm SIGERR */ > ++mr->sig->sigerr_count; > + if (mr_type == IB_MR_TYPE_PI) { > + mr->pi_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, > + max_num_meta_sg); > + if (IS_ERR(mr->pi_mr)) { > + err = PTR_ERR(mr->pi_mr); > + goto err_destroy_psv; > + } > + } > } else { > mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); > err = -EINVAL; > @@ -1773,7 +1853,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, > mr->ibmr.device = pd->device; > err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); > if (err) > - goto err_destroy_psv; > + goto err_free_pi_mr; > > mr->mmkey.type = MLX5_MKEY_MR; > mr->ibmr.lkey = mr->mmkey.key; > @@ -1783,6 +1863,11 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, > > return &mr->ibmr; > > +err_free_pi_mr: > + if (mr->pi_mr) { > + dereg_mr(to_mdev(mr->pi_mr->ibmr.device), mr->pi_mr); > + mr->pi_mr = NULL; > + } > err_destroy_psv: > if (mr->sig) { > if (mlx5_core_destroy_psv(dev->mdev, > @@ -1804,6 +1889,20 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, > return ERR_PTR(err); > } > > +struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, > + enum ib_mr_type mr_type, > + u32 max_num_sg) > +{ > + return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0); > +} > + > +struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, > + u32 max_num_sg, u32 max_num_meta_sg) > +{ > + return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_PI, max_num_sg, > + max_num_meta_sg); > +} > + > struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, > struct ib_udata *udata) > { > @@ -1934,18 +2033,19 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, > > static int > mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, > - struct ib_scatterlist *ib_sgl) > + struct ib_scatterlist *data_ib_sgl, > + struct ib_scatterlist *meta_ib_sgl) > { > - struct scatterlist *sg = ib_sgl->sg; > + struct scatterlist *sg = data_ib_sgl->sg; > struct mlx5_klm *klms = mr->descs; > - unsigned int sg_offset = ib_sgl->offset; > + unsigned int sg_offset = data_ib_sgl->offset; > u32 lkey = mr->ibmr.pd->local_dma_lkey; > - int i; > + int i, j = 0; > > mr->ibmr.iova = sg_dma_address(sg) + sg_offset; > mr->ibmr.length = 0; > > - for_each_sg(ib_sgl->sg, sg, ib_sgl->dma_nents, i) { > + for_each_sg(data_ib_sgl->sg, sg, data_ib_sgl->dma_nents, i) { > if (unlikely(i >= mr->max_descs)) > break; > klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); > @@ -1955,11 +2055,34 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, > > sg_offset = 0; > } > + > + data_ib_sgl->offset = sg_offset; > + > mr->ndescs = i; > + mr->data_length = mr->ibmr.length; > + > + if (meta_ib_sgl && meta_ib_sgl->dma_nents) { > + sg = meta_ib_sgl->sg; > + sg_offset = meta_ib_sgl->offset; > + for_each_sg(meta_ib_sgl->sg, sg, meta_ib_sgl->dma_nents, j) { > + if (unlikely(i + j >= mr->max_descs)) > + break; > + klms[i + j].va = cpu_to_be64(sg_dma_address(sg) + > + sg_offset); > + klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) - > + sg_offset); > + klms[i + j].key = cpu_to_be32(lkey); > + mr->ibmr.length += sg_dma_len(sg) - sg_offset; > + > + sg_offset = 0; > + } > + meta_ib_sgl->offset = sg_offset; > > - ib_sgl->offset = sg_offset; > + mr->meta_ndescs = j; > + mr->meta_length = mr->ibmr.length - mr->data_length; > + } > > - return i; > + return i + j; > } > > static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) > @@ -1976,6 +2099,36 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) > return 0; > } > > +int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct ib_scatterlist *data_ib_sg, > + struct ib_scatterlist *meta_ib_sg) > +{ > + struct mlx5_ib_mr *mr = to_mmr(ibmr); > + struct mlx5_ib_mr *pi_mr = mr->pi_mr; > + int n; > + > + WARN_ON(ibmr->type != IB_MR_TYPE_PI); > + > + pi_mr->ndescs = 0; > + pi_mr->meta_ndescs = 0; > + > + ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, > + pi_mr->desc_size * pi_mr->max_descs, > + DMA_TO_DEVICE); > + > + n = mlx5_ib_sg_to_klms(pi_mr, data_ib_sg, meta_ib_sg); > + > + /* This is zero-based memory region */ > + pi_mr->ibmr.iova = 0; > + ibmr->length = pi_mr->ibmr.length; > + ibmr->iova = pi_mr->ibmr.iova; Nice and clean! love it... btw, have you considered using the dma lkey for single entry data and pi? Would probably save the overhead of posting a second UMR for 4K workloads.
On 2/7/2019 8:21 PM, Sagi Grimberg wrote: > btw, have you considered using the dma lkey for single entry data and > pi? Would probably save the overhead of posting a second UMR for > 4K workloads. Good idea, we'll address perf improvements during the NVMEoF/RDMA host metadata submission.
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 94fe253d4956..c4b2d9db5d07 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5837,6 +5837,7 @@ static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) static const struct ib_device_ops mlx5_ib_dev_ops = { .add_gid = mlx5_ib_add_gid, .alloc_mr = mlx5_ib_alloc_mr, + .alloc_mr_integrity = mlx5_ib_alloc_mr_integrity, .alloc_pd = mlx5_ib_alloc_pd, .alloc_ucontext = mlx5_ib_alloc_ucontext, .attach_mcast = mlx5_ib_mcg_attach, @@ -5866,6 +5867,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, .map_mr_sg = mlx5_ib_map_mr_sg, + .map_mr_sg_pi = mlx5_ib_map_mr_sg_pi, .mmap = mlx5_ib_mmap, .modify_cq = mlx5_ib_modify_cq, .modify_device = mlx5_ib_modify_device, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 33b0d042ef05..7ef1c0a3c886 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -567,6 +567,9 @@ struct mlx5_ib_mr { void *descs; dma_addr_t desc_map; int ndescs; + int data_length; + int meta_ndescs; + int meta_length; int max_descs; int desc_size; int access_mode; @@ -585,6 +588,7 @@ struct mlx5_ib_mr { int access_flags; /* Needed for rereg MR */ struct mlx5_ib_mr *parent; + struct mlx5_ib_mr *pi_mr; /* Needed for IB_MR_TYPE_PI type */ atomic_t num_leaf_free; wait_queue_head_t q_leaf_free; }; @@ -1107,7 +1111,12 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); +struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, + u32 max_num_sg, + u32 max_num_meta_sg); int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct ib_scatterlist *ib_sg); +int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct ib_scatterlist *data_ib_sg, + struct ib_scatterlist *meta_ib_sg); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 659d39734523..b17e7078bdc4 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1684,17 +1684,22 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) int mlx5_ib_dereg_mr(struct ib_mr *ibmr) { - dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr)); + struct mlx5_ib_mr *mmr = to_mmr(ibmr); + + if (ibmr->type == IB_MR_TYPE_PI) + dereg_mr(to_mdev(mmr->pi_mr->ibmr.device), mmr->pi_mr); + + dereg_mr(to_mdev(ibmr->device), mmr); + return 0; } -struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, + u32 max_num_sg, u32 max_num_meta_sg) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - int ndescs = ALIGN(max_num_sg, 4); + int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4); struct mlx5_ib_mr *mr; void *mkc; u32 *in; @@ -1716,8 +1721,72 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; + + err = mlx5_alloc_priv_descs(pd->device, mr, + ndescs, sizeof(struct mlx5_klm)); + if (err) + goto err_free_in; + mr->desc_size = sizeof(struct mlx5_klm); + mr->max_descs = ndescs; + + MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7); + MLX5_SET(mkc, mkc, umr_en, 1); + + mr->ibmr.pd = pd; + mr->ibmr.device = pd->device; + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); + if (err) + goto err_priv_descs; + + mr->mmkey.type = MLX5_MKEY_MR; + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; + mr->umem = NULL; + kfree(in); + + return mr; + +err_priv_descs: + mlx5_free_priv_descs(mr); +err_free_in: + kfree(in); +err_free: + kfree(mr); + return ERR_PTR(err); +} + +static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg, u32 max_num_meta_sg) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + int ndescs = ALIGN(max_num_sg, 4); + struct mlx5_ib_mr *mr; + void *mkc; + u32 *in; + int err; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + if (mr_type == IB_MR_TYPE_MEM_REG) { mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT; + MLX5_SET(mkc, mkc, translations_octword_size, ndescs); MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, sizeof(struct mlx5_mtt)); @@ -1728,6 +1797,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, mr->max_descs = ndescs; } else if (mr_type == IB_MR_TYPE_SG_GAPS) { mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; + MLX5_SET(mkc, mkc, translations_octword_size, ndescs); err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, sizeof(struct mlx5_klm)); @@ -1735,11 +1805,13 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, goto err_free_in; mr->desc_size = sizeof(struct mlx5_klm); mr->max_descs = ndescs; - } else if (mr_type == IB_MR_TYPE_SIGNATURE) { + } else if (mr_type == IB_MR_TYPE_SIGNATURE || + mr_type == IB_MR_TYPE_PI) { u32 psv_index[2]; MLX5_SET(mkc, mkc, bsf_en, 1); MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); + MLX5_SET(mkc, mkc, translations_octword_size, 4); mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); if (!mr->sig) { err = -ENOMEM; @@ -1760,6 +1832,14 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, mr->sig->sig_err_exists = false; /* Next UMR, Arm SIGERR */ ++mr->sig->sigerr_count; + if (mr_type == IB_MR_TYPE_PI) { + mr->pi_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, + max_num_meta_sg); + if (IS_ERR(mr->pi_mr)) { + err = PTR_ERR(mr->pi_mr); + goto err_destroy_psv; + } + } } else { mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); err = -EINVAL; @@ -1773,7 +1853,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, mr->ibmr.device = pd->device; err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) - goto err_destroy_psv; + goto err_free_pi_mr; mr->mmkey.type = MLX5_MKEY_MR; mr->ibmr.lkey = mr->mmkey.key; @@ -1783,6 +1863,11 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, return &mr->ibmr; +err_free_pi_mr: + if (mr->pi_mr) { + dereg_mr(to_mdev(mr->pi_mr->ibmr.device), mr->pi_mr); + mr->pi_mr = NULL; + } err_destroy_psv: if (mr->sig) { if (mlx5_core_destroy_psv(dev->mdev, @@ -1804,6 +1889,20 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, return ERR_PTR(err); } +struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg) +{ + return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0); +} + +struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, + u32 max_num_sg, u32 max_num_meta_sg) +{ + return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_PI, max_num_sg, + max_num_meta_sg); +} + struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata) { @@ -1934,18 +2033,19 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, static int mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, - struct ib_scatterlist *ib_sgl) + struct ib_scatterlist *data_ib_sgl, + struct ib_scatterlist *meta_ib_sgl) { - struct scatterlist *sg = ib_sgl->sg; + struct scatterlist *sg = data_ib_sgl->sg; struct mlx5_klm *klms = mr->descs; - unsigned int sg_offset = ib_sgl->offset; + unsigned int sg_offset = data_ib_sgl->offset; u32 lkey = mr->ibmr.pd->local_dma_lkey; - int i; + int i, j = 0; mr->ibmr.iova = sg_dma_address(sg) + sg_offset; mr->ibmr.length = 0; - for_each_sg(ib_sgl->sg, sg, ib_sgl->dma_nents, i) { + for_each_sg(data_ib_sgl->sg, sg, data_ib_sgl->dma_nents, i) { if (unlikely(i >= mr->max_descs)) break; klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); @@ -1955,11 +2055,34 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, sg_offset = 0; } + + data_ib_sgl->offset = sg_offset; + mr->ndescs = i; + mr->data_length = mr->ibmr.length; + + if (meta_ib_sgl && meta_ib_sgl->dma_nents) { + sg = meta_ib_sgl->sg; + sg_offset = meta_ib_sgl->offset; + for_each_sg(meta_ib_sgl->sg, sg, meta_ib_sgl->dma_nents, j) { + if (unlikely(i + j >= mr->max_descs)) + break; + klms[i + j].va = cpu_to_be64(sg_dma_address(sg) + + sg_offset); + klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) - + sg_offset); + klms[i + j].key = cpu_to_be32(lkey); + mr->ibmr.length += sg_dma_len(sg) - sg_offset; + + sg_offset = 0; + } + meta_ib_sgl->offset = sg_offset; - ib_sgl->offset = sg_offset; + mr->meta_ndescs = j; + mr->meta_length = mr->ibmr.length - mr->data_length; + } - return i; + return i + j; } static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) @@ -1976,6 +2099,36 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) return 0; } +int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct ib_scatterlist *data_ib_sg, + struct ib_scatterlist *meta_ib_sg) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct mlx5_ib_mr *pi_mr = mr->pi_mr; + int n; + + WARN_ON(ibmr->type != IB_MR_TYPE_PI); + + pi_mr->ndescs = 0; + pi_mr->meta_ndescs = 0; + + ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, + pi_mr->desc_size * pi_mr->max_descs, + DMA_TO_DEVICE); + + n = mlx5_ib_sg_to_klms(pi_mr, data_ib_sg, meta_ib_sg); + + /* This is zero-based memory region */ + pi_mr->ibmr.iova = 0; + ibmr->length = pi_mr->ibmr.length; + ibmr->iova = pi_mr->ibmr.iova; + + ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, + pi_mr->desc_size * pi_mr->max_descs, + DMA_TO_DEVICE); + + return n; +} + int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct ib_scatterlist *ib_sg) { struct mlx5_ib_mr *mr = to_mmr(ibmr); @@ -1988,7 +2141,7 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct ib_scatterlist *ib_sg) DMA_TO_DEVICE); if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) - n = mlx5_ib_sg_to_klms(mr, ib_sg); + n = mlx5_ib_sg_to_klms(mr, ib_sg, NULL); else n = ib_sg_to_pages(ibmr, ib_sg, mlx5_set_page);