[v5,for-next,2/2] RDMA/hns: Delayed flush cqe process with workqueue

Message ID	1577503735-26685-3-git-send-email-liuyixian@huawei.com (mailing list archive)
State	Superseded
Delegated to:	Jason Gunthorpe
Headers	show Return-Path: <SRS0=Q7jl=2S=vger.kernel.org=linux-rdma-owner@kernel.org> From: Yixian Liu <liuyixian@huawei.com> To: <dledford@redhat.com>, <jgg@ziepe.ca>, <leon@kernel.org> CC: <linux-rdma@vger.kernel.org>, <linuxarm@huawei.com> Subject: [PATCH v5 for-next 2/2] RDMA/hns: Delayed flush cqe process with workqueue Date: Sat, 28 Dec 2019 11:28:55 +0800 Message-ID: <1577503735-26685-3-git-send-email-liuyixian@huawei.com> In-Reply-To: <1577503735-26685-1-git-send-email-liuyixian@huawei.com> References: <1577503735-26685-1-git-send-email-liuyixian@huawei.com> MIME-Version: 1.0 Content-Type: text/plain Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk
Series	RDMA/hns: Add the workqueue framework for flush cqe handler \| expand [v5,for-next,0/2] RDMA/hns: Add the workqueue framework for flush cqe handler [v5,for-next,2/2] RDMA/hns: Delayed flush cqe process with workqueue

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a87a838..0ba2387 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -667,6 +667,8 @@ struct hns_roce_qp { u8 sl; u8 resp_depth; u8 state; + /* 1: PI is being pushed, 0: PI is not being pushed */ + u8 being_pushed; u32 access_flags; u32 atomic_rd_en; u32 pkey_index; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 2afcedd..2e8ce21 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -221,11 +221,6 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, return 0; } -static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, - const struct ib_qp_attr *attr, - int attr_mask, enum ib_qp_state cur_state, - enum ib_qp_state new_state); - static int hns_roce_v2_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) @@ -238,14 +233,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct hns_roce_wqe_frmr_seg *fseg; struct device *dev = hr_dev->dev; struct hns_roce_v2_db sq_db; - struct ib_qp_attr attr; unsigned int sge_ind; unsigned int owner_bit; unsigned long flags; unsigned int ind; void *wqe = NULL; bool loopback; - int attr_mask; u32 tmp_len; int ret = 0; u32 hr_op; @@ -591,18 +584,17 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, qp->sq_next_wqe = ind; qp->next_sge = sge_ind; - if (qp->state == IB_QPS_ERR) { - attr_mask = IB_QP_STATE; - attr.qp_state = IB_QPS_ERR; - - ret = hns_roce_v2_modify_qp(&qp->ibqp, &attr, attr_mask, - qp->state, IB_QPS_ERR); - if (ret) { - spin_unlock_irqrestore(&qp->sq.lock, flags); - *bad_wr = wr; - return ret; - } - } + /* + * Hip08 hardware cannot flush the WQEs in SQ if the QP state + * gets into errored mode. Hence, as a workaround to this + * hardware limitation, driver needs to assist in flushing. But + * the flushing operation uses mailbox to convey the QP state to + * the hardware and which can sleep due to the mutex protection + * around the mailbox calls. Hence, use the deferred flush for + * now. + */ + if (qp->state == IB_QPS_ERR && !qp->being_pushed) + init_flush_work(hr_dev, qp); } spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -619,10 +611,8 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_rinl_sge *sge_list; struct device *dev = hr_dev->dev; - struct ib_qp_attr attr; unsigned long flags; void *wqe = NULL; - int attr_mask; int ret = 0; int nreq; int ind; @@ -692,19 +682,17 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, *hr_qp->rdb.db_record = hr_qp->rq.head & 0xffff; - if (hr_qp->state == IB_QPS_ERR) { - attr_mask = IB_QP_STATE; - attr.qp_state = IB_QPS_ERR; - - ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr, - attr_mask, hr_qp->state, - IB_QPS_ERR); - if (ret) { - spin_unlock_irqrestore(&hr_qp->rq.lock, flags); - *bad_wr = wr; - return ret; - } - } + /* + * Hip08 hardware cannot flush the WQEs in RQ if the QP state + * gets into errored mode. Hence, as a workaround to this + * hardware limitation, driver needs to assist in flushing. But + * the flushing operation uses mailbox to convey the QP state to + * the hardware and which can sleep due to the mutex protection + * around the mailbox calls. Hence, use the deferred flush for + * now. + */ + if (hr_qp->state == IB_QPS_ERR && !hr_qp->being_pushed) + init_flush_work(hr_dev, hr_qp); } spin_unlock_irqrestore(&hr_qp->rq.lock, flags); @@ -2690,13 +2678,11 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, struct hns_roce_qp **cur_qp, struct ib_wc *wc) { + struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device); struct hns_roce_srq *srq = NULL; - struct hns_roce_dev *hr_dev; struct hns_roce_v2_cqe *cqe; struct hns_roce_qp *hr_qp; struct hns_roce_wq *wq; - struct ib_qp_attr attr; - int attr_mask; int is_send; u16 wqe_ctr; u32 opcode; @@ -2720,7 +2706,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, V2_CQE_BYTE_16_LCL_QPN_S); if (!*cur_qp || (qpn & HNS_ROCE_V2_CQE_QPN_MASK) != (*cur_qp)->qpn) { - hr_dev = to_hr_dev(hr_cq->ib_cq.device); hr_qp = __hns_roce_qp_lookup(hr_dev, qpn); if (unlikely(!hr_qp)) { dev_err(hr_dev->dev, "CQ %06lx with entry for unknown QPN %06x\n", @@ -2814,14 +2799,22 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, break; } - /* flush cqe if wc status is error, excluding flush error */ - if ((wc->status != IB_WC_SUCCESS) && - (wc->status != IB_WC_WR_FLUSH_ERR)) { - attr_mask = IB_QP_STATE; - attr.qp_state = IB_QPS_ERR; - return hns_roce_v2_modify_qp(&(*cur_qp)->ibqp, - &attr, attr_mask, - (*cur_qp)->state, IB_QPS_ERR); + /* + * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state gets + * into errored mode. Hence, as a workaround to this hardware + * limitation, driver needs to assist in flushing. But the flushing + * operation uses mailbox to convey the QP state to the hardware and + * which can sleep due to the mutex protection around the mailbox calls. + * Hence, use the deferred flush for now. Once wc error detected, the + * flushing operation is needed. + */ + if (wc->status != IB_WC_SUCCESS && + wc->status != IB_WC_WR_FLUSH_ERR && + !(*cur_qp)->being_pushed) { + dev_err(hr_dev->dev, "error cqe status is: 0x%x\n", + status & HNS_ROCE_V2_CQE_STATUS_MASK); + init_flush_work(hr_dev, *cur_qp); + return 0; } if (wc->status == IB_WC_WR_FLUSH_ERR) @@ -4389,6 +4382,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, struct hns_roce_v2_qp_context *context = ctx; struct hns_roce_v2_qp_context *qpc_mask = ctx + 1; struct device *dev = hr_dev->dev; + unsigned long sq_flags = 0; + unsigned long rq_flags = 0; int ret; /* @@ -4406,6 +4401,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, /* When QP state is err, SQ and RQ WQE should be flushed */ if (new_state == IB_QPS_ERR) { + spin_lock_irqsave(&hr_qp->sq.lock, sq_flags); roce_set_field(context->byte_160_sq_ci_pi, V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, @@ -4413,8 +4409,12 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_160_sq_ci_pi, V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0); + hr_qp->state = IB_QPS_ERR; + hr_qp->being_pushed = 0; + spin_unlock_irqrestore(&hr_qp->sq.lock, sq_flags); if (!ibqp->srq) { + spin_lock_irqsave(&hr_qp->rq.lock, rq_flags); roce_set_field(context->byte_84_rq_ci_pi, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, @@ -4422,6 +4422,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_84_rq_ci_pi, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0); + spin_unlock_irqrestore(&hr_qp->rq.lock, rq_flags); } } @@ -4466,6 +4467,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, hr_qp->sq.tail = 0; hr_qp->sq_next_wqe = 0; hr_qp->next_sge = 0; + hr_qp->being_pushed = 0; if (hr_qp->rq.wqe_cnt) *hr_qp->rdb.db_record = 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 0c1e74a..a30f86c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -79,6 +79,7 @@ void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) if (!flush_work) return; + hr_qp->being_pushed = 1; flush_work->hr_dev = hr_dev; flush_work->hr_qp = hr_qp; INIT_WORK(&flush_work->work, flush_work_handle); @@ -748,6 +749,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, spin_lock_init(&hr_qp->rq.lock); hr_qp->state = IB_QPS_RESET; + hr_qp->being_pushed = 0; hr_qp->ibqp.qp_type = init_attr->qp_type;

[v5,for-next,2/2] RDMA/hns: Delayed flush cqe process with workqueue

Commit Message

Patch