From patchwork Mon Sep 17 13:55:13 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Lijun Ou X-Patchwork-Id: 10602751 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 1EF0E161F for ; Mon, 17 Sep 2018 13:18:43 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 11D5F29C0D for ; Mon, 17 Sep 2018 13:18:43 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 0660529C21; Mon, 17 Sep 2018 13:18:43 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 7EEE529C0D for ; Mon, 17 Sep 2018 13:18:42 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728079AbeIQSp7 (ORCPT ); Mon, 17 Sep 2018 14:45:59 -0400 Received: from szxga07-in.huawei.com ([45.249.212.35]:53690 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728095AbeIQSp7 (ORCPT ); Mon, 17 Sep 2018 14:45:59 -0400 Received: from DGGEMS402-HUB.china.huawei.com (unknown [172.30.72.60]) by Forcepoint Email with ESMTP id AD4C63BF5A9DF; Mon, 17 Sep 2018 21:18:36 +0800 (CST) Received: from linux-ioko.site (10.71.200.31) by DGGEMS402-HUB.china.huawei.com (10.3.19.202) with Microsoft SMTP Server id 14.3.399.0; Mon, 17 Sep 2018 21:18:32 +0800 From: Lijun Ou To: , CC: , Subject: [PATCH V2 for-next 2/4] RDMA/hns: Add atomic support Date: Mon, 17 Sep 2018 21:55:13 +0800 Message-ID: <1537192515-114922-3-git-send-email-oulijun@huawei.com> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1537192515-114922-1-git-send-email-oulijun@huawei.com> References: <1537192515-114922-1-git-send-email-oulijun@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.71.200.31] X-CFilter-Loop: Reflected Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP This patch adds atomic operations for hip08, includes fetchadd and cmpswap operation. In order to enable atomic, the driver needs to do the following step: 1. Enable the atomic caps for RoCE device 2. Post the wqe context of atomic type 3. Configure the atomic type of mtpt Signed-off-by: Lijun Ou --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 39 +++++++++++++++++++++++++++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 5 ++++ drivers/infiniband/hw/hns/hns_roce_main.c | 3 ++- 4 files changed, 45 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 9a24fd0..6dadb12 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -193,6 +193,7 @@ enum { HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2), HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3), HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4), + HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), }; enum hns_roce_mtt_type { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4cc3fbf..b8e515f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -54,6 +54,18 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, dseg->len = cpu_to_le32(sg->length); } +static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg, + const struct ib_atomic_wr *wr) +{ + if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->fetchadd_swap_data = cpu_to_le64(wr->swap); + aseg->cmp_data = cpu_to_le64(wr->compare_add); + } else { + aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add); + aseg->cmp_data = 0; + } +} + static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, unsigned int *sge_ind) { @@ -179,6 +191,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct hns_roce_v2_ud_send_wqe *ud_sq_wqe; struct hns_roce_v2_rc_send_wqe *rc_sq_wqe; struct hns_roce_qp *qp = to_hr_qp(ibqp); + struct hns_roce_v2_wqe_data_seg *dseg; struct device *dev = hr_dev->dev; struct hns_roce_v2_db sq_db; struct ib_qp_attr attr; @@ -407,6 +420,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit); + wqe += sizeof(struct hns_roce_v2_rc_send_wqe); switch (wr->opcode) { case IB_WR_RDMA_READ: hr_op = HNS_ROCE_V2_WQE_OP_RDMA_READ; @@ -443,9 +457,21 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, break; case IB_WR_ATOMIC_CMP_AND_SWP: hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP; + rc_sq_wqe->rkey = + cpu_to_le32(atomic_wr(wr)->rkey); + rc_sq_wqe->va = + cpu_to_le32(atomic_wr(wr)->remote_addr); + wqe += sizeof(struct hns_roce_v2_wqe_data_seg); + set_atomic_seg(wqe, atomic_wr(wr)); break; case IB_WR_ATOMIC_FETCH_AND_ADD: hr_op = HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD; + rc_sq_wqe->rkey = + cpu_to_le32(atomic_wr(wr)->rkey); + rc_sq_wqe->va = + cpu_to_le32(atomic_wr(wr)->remote_addr); + wqe += sizeof(struct hns_roce_v2_wqe_data_seg); + set_atomic_seg(wqe, atomic_wr(wr)); break; case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: hr_op = @@ -463,7 +489,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M, V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op); - wqe += sizeof(struct hns_roce_v2_rc_send_wqe); + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + dseg = + wqe - sizeof(struct hns_roce_v2_wqe_data_seg); + else + dseg = wqe; ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe, &sge_ind, bad_wr); @@ -1232,6 +1263,9 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->local_ca_ack_delay = 0; caps->max_mtu = IB_MTU_4096; + if (hr_dev->pci_dev->revision == 0x21) + caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC; + ret = hns_roce_v2_set_bt(hr_dev); if (ret) dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n", @@ -1663,7 +1697,8 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 0); roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S, (mr->access & IB_ACCESS_MW_BIND ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, 0); + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, + mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0); roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S, (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0)); roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 14aa308..6f92722 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1564,4 +1564,9 @@ struct hns_roce_eq_context { #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0 #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0) +struct hns_roce_wqe_atomic_seg { + __le64 fetchadd_swap_data; + __le64 cmp_data; +}; + #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index c5cae9a..1306d79 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -215,7 +215,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev, props->max_pd = hr_dev->caps.num_pds; props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma; props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma; - props->atomic_cap = IB_ATOMIC_NONE; + props->atomic_cap = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_ATOMIC ? + IB_ATOMIC_HCA : IB_ATOMIC_NONE; props->max_pkeys = 1; props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;