From patchwork Mon Jun 8 13:15:36 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Sagi Grimberg X-Patchwork-Id: 6565041 Return-Path: X-Original-To: patchwork-linux-rdma@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork1.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.136]) by patchwork1.web.kernel.org (Postfix) with ESMTP id C398D9F2F4 for ; Mon, 8 Jun 2015 13:16:28 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 1B58120525 for ; Mon, 8 Jun 2015 13:16:23 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id ADA7C20524 for ; Mon, 8 Jun 2015 13:16:21 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752591AbbFHNQU (ORCPT ); Mon, 8 Jun 2015 09:16:20 -0400 Received: from [193.47.165.129] ([193.47.165.129]:48226 "EHLO mellanox.co.il" rhost-flags-FAIL-FAIL-OK-FAIL) by vger.kernel.org with ESMTP id S1752307AbbFHNQT (ORCPT ); Mon, 8 Jun 2015 09:16:19 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from sagig@mellanox.com) with ESMTPS (AES256-SHA encrypted); 8 Jun 2015 16:15:26 +0300 Received: from r-vnc05.mtr.labs.mlnx (r-vnc05.mtr.labs.mlnx [10.208.0.115]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id t58DFeQG014232; Mon, 8 Jun 2015 16:15:40 +0300 Received: from r-vnc05.mtr.labs.mlnx (localhost [127.0.0.1]) by r-vnc05.mtr.labs.mlnx (8.14.4/8.14.4) with ESMTP id t58DFeST001004; Mon, 8 Jun 2015 16:15:40 +0300 Received: (from sagig@localhost) by r-vnc05.mtr.labs.mlnx (8.14.4/8.14.4/Submit) id t58DFeeS001003; Mon, 8 Jun 2015 16:15:40 +0300 From: Sagi Grimberg To: Doug Ledford Cc: linux-rdma@vger.kernel.org, Or Gerlitz , Eli Cohen , Oren Duer , Sagi Grimberg Subject: [PATCH 2/5] IB/mlx5: Implement Fast Indirect Memory Registration Feature Date: Mon, 8 Jun 2015 16:15:36 +0300 Message-Id: <1433769339-949-3-git-send-email-sagig@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1433769339-949-1-git-send-email-sagig@mellanox.com> References: <1433769339-949-1-git-send-email-sagig@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Spam-Status: No, score=-6.9 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI, T_RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=ham version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP This patch implements: - ib_alloc/free_indir_reg_list() routines - ib_create_mr() extension for IB_MR_INDIRECT_REG - ib_post_send() extension for IB_WR_REG_INDIR_MR and work completion of IB_WC_REG_INDIR_MR - Expose mlx5 indirect registration device capabilities Signed-off-by: Sagi Grimberg --- drivers/infiniband/hw/mlx5/cq.c | 2 + drivers/infiniband/hw/mlx5/main.c | 4 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 19 ++++++ drivers/infiniband/hw/mlx5/mr.c | 66 +++++++++++++++++++++ drivers/infiniband/hw/mlx5/qp.c | 106 ++++++++++++++++++++++++++++++++++ 5 files changed, 197 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 2ee6b10..43495c6 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -111,6 +111,8 @@ static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx) case IB_WR_FAST_REG_MR: return IB_WC_FAST_REG_MR; + case IB_WR_REG_INDIR_MR: + return IB_WC_REG_INDIR_MR; default: pr_warn("unknown completion status\n"); return 0; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 582bfd9..47a3d76 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -107,6 +107,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (flags & MLX5_DEV_CAP_FLAG_XRC) props->device_cap_flags |= IB_DEVICE_XRC; props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + props->device_cap_flags |= IB_DEVICE_INDIR_REGISTRATION; if (flags & MLX5_DEV_CAP_FLAG_SIG_HAND_OVER) { props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; /* At this stage no support for signature handover */ @@ -145,6 +146,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq_sge = max_rq_sg - 1; props->max_fast_reg_page_list_len = (unsigned int)-1; + props->max_indir_reg_mr_list_len = 1 << gen->log_max_klm_list_size; props->local_ca_ack_delay = gen->local_ca_ack_delay; props->atomic_cap = IB_ATOMIC_NONE; props->masked_atomic_cap = IB_ATOMIC_NONE; @@ -1302,6 +1304,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; dev->ib_dev.get_port_immutable = mlx5_port_immutable; + dev->ib_dev.alloc_indir_reg_list = mlx5_ib_alloc_indir_reg_list; + dev->ib_dev.free_indir_reg_list = mlx5_ib_free_indir_reg_list; mlx5_ib_internal_query_odp_caps(dev); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d8e07c1..68d8865 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -334,6 +334,13 @@ struct mlx5_ib_fast_reg_page_list { dma_addr_t map; }; +struct mlx5_ib_indir_reg_list { + struct ib_indir_reg_list ib_irl; + void *mapped_ilist; + struct mlx5_klm *klms; + dma_addr_t map; +}; + struct mlx5_ib_umr_context { enum ib_wc_status status; struct completion done; @@ -508,6 +515,12 @@ static inline struct mlx5_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_pag return container_of(ibfrpl, struct mlx5_ib_fast_reg_page_list, ibfrpl); } +static inline struct mlx5_ib_indir_reg_list * +to_mindir_list(struct ib_indir_reg_list *ib_irl) +{ + return container_of(ib_irl, struct mlx5_ib_indir_reg_list, ib_irl); +} + struct mlx5_ib_ah { struct ib_ah ibah; struct mlx5_av av; @@ -578,6 +591,12 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len); void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); + +struct ib_indir_reg_list * +mlx5_ib_alloc_indir_reg_list(struct ib_device *device, + unsigned int max_indir_list_len); +void mlx5_ib_free_indir_reg_list(struct ib_indir_reg_list *indir_list); + struct ib_fmr *mlx5_ib_fmr_alloc(struct ib_pd *pd, int acc, struct ib_fmr_attr *fmr_attr); int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 04b6787..25c7583 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1300,6 +1300,9 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, ++mr->sig->sigerr_count; } + if (mr_init_attr->flags & IB_MR_INDIRECT_REG) + access_mode = MLX5_ACCESS_MODE_KLM; + in->seg.flags = MLX5_PERM_UMR_EN | access_mode; err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), NULL, NULL, NULL); @@ -1459,3 +1462,66 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, done: return ret; } + +struct ib_indir_reg_list * +mlx5_ib_alloc_indir_reg_list(struct ib_device *device, + unsigned int max_indir_list_len) +{ + struct device *ddev = device->dma_device; + struct mlx5_ib_indir_reg_list *mirl = NULL; + int dsize; + int err; + + mirl = kzalloc(sizeof(*mirl), GFP_KERNEL); + if (!mirl) + return ERR_PTR(-ENOMEM); + + mirl->ib_irl.sg_list = kcalloc(max_indir_list_len, + sizeof(*mirl->ib_irl.sg_list), + GFP_KERNEL); + if (!mirl->ib_irl.sg_list) { + err = -ENOMEM; + goto err_sg_list; + } + + dsize = sizeof(*mirl->klms) * max_indir_list_len; + dsize += max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0); + mirl->mapped_ilist = kzalloc(dsize, GFP_KERNEL); + if (!mirl->mapped_ilist) { + err = -ENOMEM; + goto err_mapped_list; + } + + mirl->klms = (void *)ALIGN((uintptr_t)mirl->mapped_ilist, + MLX5_UMR_ALIGN); + mirl->map = dma_map_single(ddev, mirl->klms, + dsize, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, mirl->map)) { + err = -ENOMEM; + goto err_dma_map; + } + + return &mirl->ib_irl; +err_dma_map: + kfree(mirl->mapped_ilist); +err_mapped_list: + kfree(mirl->ib_irl.sg_list); +err_sg_list: + kfree(mirl); + + return ERR_PTR(err); +} + +void +mlx5_ib_free_indir_reg_list(struct ib_indir_reg_list *indir_list) +{ + struct mlx5_ib_indir_reg_list *mirl = to_mindir_list(indir_list); + struct device *ddev = indir_list->device->dma_device; + int dsize; + + dsize = sizeof(*mirl->klms) * indir_list->max_indir_list_len; + dma_unmap_single(ddev, mirl->map, dsize, DMA_TO_DEVICE); + kfree(mirl->mapped_ilist); + kfree(mirl->ib_irl.sg_list); + kfree(mirl); +} diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d35f62d..64b969b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -65,6 +65,7 @@ static const u32 mlx5_ib_opcode[] = { [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL, [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR, [IB_WR_FAST_REG_MR] = MLX5_OPCODE_UMR, + [IB_WR_REG_INDIR_MR] = MLX5_OPCODE_UMR, [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS, [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA, [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, @@ -2477,6 +2478,98 @@ static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size, return 0; } +static void set_indir_mkey_segment(struct mlx5_mkey_seg *seg, + struct ib_send_wr *wr, u32 pdn) +{ + u32 list_len = wr->wr.indir_reg.indir_list_len; + + memset(seg, 0, sizeof(*seg)); + + seg->flags = get_umr_flags(wr->wr.indir_reg.access_flags) | + MLX5_ACCESS_MODE_KLM; + seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 | + mlx5_mkey_variant(wr->wr.indir_reg.mkey)); + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | pdn); + seg->len = cpu_to_be64(wr->wr.indir_reg.length); + seg->start_addr = cpu_to_be64(wr->wr.indir_reg.iova_start); + seg->xlt_oct_size = + cpu_to_be32(be16_to_cpu(get_klm_octo(list_len * 2))); +} + +static void set_indir_data_seg(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + u32 pa_key, void **seg, int *size) +{ + struct mlx5_wqe_data_seg *data = *seg; + struct mlx5_ib_indir_reg_list *mirl; + struct ib_sge *sg_list = wr->wr.indir_reg.indir_list->sg_list; + u32 list_len = wr->wr.indir_reg.indir_list_len; + int i; + + mirl = to_mindir_list(wr->wr.indir_reg.indir_list); + for (i = 0; i < list_len; i++) { + mirl->klms[i].va = cpu_to_be64(sg_list[i].addr); + mirl->klms[i].key = cpu_to_be32(sg_list[i].lkey); + mirl->klms[i].bcount = cpu_to_be32(sg_list[i].length); + } + + data->byte_count = cpu_to_be32(ALIGN(sizeof(struct mlx5_klm) * + list_len, 64)); + data->lkey = cpu_to_be32(pa_key); + data->addr = cpu_to_be64(mirl->map); + *seg += sizeof(*data); + *size += sizeof(*data) / 16; +} + +static void set_indir_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, + struct ib_send_wr *wr) +{ + u64 mask; + u32 list_len = wr->wr.indir_reg.indir_list_len; + + memset(umr, 0, sizeof(*umr)); + + umr->flags = MLX5_UMR_CHECK_NOT_FREE; + umr->klm_octowords = get_klm_octo(list_len * 2); + mask = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_EN_RINVAL | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_A | + MLX5_MKEY_MASK_FREE; + + umr->mkey_mask = cpu_to_be64(mask); +} + +static int set_indir_reg_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size) +{ + struct mlx5_ib_pd *pd = get_pd(qp); + + if (unlikely(wr->send_flags & IB_SEND_INLINE)) + return -EINVAL; + + set_indir_umr_segment(*seg, wr); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + if (unlikely(*seg == qp->sq.qend)) + *seg = mlx5_get_send_wqe(qp, 0); + + set_indir_mkey_segment(*seg, wr, pd->pdn); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + if (unlikely(*seg == qp->sq.qend)) + *seg = mlx5_get_send_wqe(qp, 0); + + set_indir_data_seg(wr, qp, pd->pa_lkey, seg, size); + + return 0; +} + static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) { __be32 *p = NULL; @@ -2688,6 +2781,19 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, num_sge = 0; break; + case IB_WR_REG_INDIR_MR: + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + qp->sq.wr_data[idx] = IB_WR_REG_INDIR_MR; + ctrl->imm = cpu_to_be32(wr->wr.indir_reg.mkey); + err = set_indir_reg_wr(wr, qp, &seg, &size); + if (err) { + mlx5_ib_warn(dev, "Failed to set indir_reg wqe\n"); + *bad_wr = wr; + goto out; + } + num_sge = 0; + break; + case IB_WR_REG_SIG_MR: qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; mr = to_mmr(wr->wr.sig_handover.sig_mr);