diff mbox series

[v2,for-next] RDMA/hns: Add support for XRC on HIP09

Message ID 1614826558-35423-1-git-send-email-liweihang@huawei.com (mailing list archive)
State Accepted
Delegated to: Jason Gunthorpe
Headers show
Series [v2,for-next] RDMA/hns: Add support for XRC on HIP09 | expand

Commit Message

Weihang Li March 4, 2021, 2:55 a.m. UTC
From: Wenpeng Liang <liangwenpeng@huawei.com>

The HIP09 supports XRC transport service, it greatly saves the number of
QPs required to connect all processes in a large cluster.

Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
Changes since v1:
- Change all type of xrcdn-related variables/fields to u32.
- Link: https://patchwork.kernel.org/project/linux-rdma/patch/1614689298-13601-1-git-send-email-liweihang@huawei.com/

 drivers/infiniband/hw/hns/hns_roce_alloc.c  |   3 +
 drivers/infiniband/hw/hns/hns_roce_device.h |  25 +++++
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 147 +++++++++++++++++++---------
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h  |   2 +
 drivers/infiniband/hw/hns/hns_roce_main.c   |  32 +++++-
 drivers/infiniband/hw/hns/hns_roce_pd.c     |  51 ++++++++++
 drivers/infiniband/hw/hns/hns_roce_qp.c     |  63 ++++++++----
 drivers/infiniband/hw/hns/hns_roce_srq.c    |   3 +
 include/uapi/rdma/hns-abi.h                 |   2 +
 9 files changed, 258 insertions(+), 70 deletions(-)

Comments

Jason Gunthorpe March 12, 2021, 12:02 a.m. UTC | #1
On Thu, Mar 04, 2021 at 10:55:58AM +0800, Weihang Li wrote:
> From: Wenpeng Liang <liangwenpeng@huawei.com>
> 
> The HIP09 supports XRC transport service, it greatly saves the number of
> QPs required to connect all processes in a large cluster.
> 
> Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
> Signed-off-by: Weihang Li <liweihang@huawei.com>
> ---
> Changes since v1:
> - Change all type of xrcdn-related variables/fields to u32.
> - Link: https://patchwork.kernel.org/project/linux-rdma/patch/1614689298-13601-1-git-send-email-liweihang@huawei.com/
> 
>  drivers/infiniband/hw/hns/hns_roce_alloc.c  |   3 +
>  drivers/infiniband/hw/hns/hns_roce_device.h |  25 +++++
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 147 +++++++++++++++++++---------
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  |   2 +
>  drivers/infiniband/hw/hns/hns_roce_main.c   |  32 +++++-
>  drivers/infiniband/hw/hns/hns_roce_pd.c     |  51 ++++++++++
>  drivers/infiniband/hw/hns/hns_roce_qp.c     |  63 ++++++++----
>  drivers/infiniband/hw/hns/hns_roce_srq.c    |   3 +
>  include/uapi/rdma/hns-abi.h                 |   2 +
>  9 files changed, 258 insertions(+), 70 deletions(-)

Applied to for-next, thanks

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 4bcaaa05..5d389ed 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -304,6 +304,9 @@  int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
 
 void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
 {
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+		hns_roce_cleanup_xrcd_table(hr_dev);
+
 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
 		hns_roce_cleanup_srq_table(hr_dev);
 	hns_roce_cleanup_qp_table(hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 3d6b7a2d..1a95fb7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -137,6 +137,7 @@  enum {
 	SERV_TYPE_UC,
 	SERV_TYPE_RD,
 	SERV_TYPE_UD,
+	SERV_TYPE_XRC = 5,
 };
 
 enum hns_roce_qp_state {
@@ -168,6 +169,8 @@  enum hns_roce_event {
 	HNS_ROCE_EVENT_TYPE_DB_OVERFLOW               = 0x12,
 	HNS_ROCE_EVENT_TYPE_MB                        = 0x13,
 	HNS_ROCE_EVENT_TYPE_FLR			      = 0x15,
+	HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION	      = 0x16,
+	HNS_ROCE_EVENT_TYPE_INVALID_XRCETH	      = 0x17,
 };
 
 #define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12
@@ -179,6 +182,7 @@  enum {
 	HNS_ROCE_CAP_FLAG_RECORD_DB		= BIT(3),
 	HNS_ROCE_CAP_FLAG_SQ_RECORD_DB		= BIT(4),
 	HNS_ROCE_CAP_FLAG_SRQ			= BIT(5),
+	HNS_ROCE_CAP_FLAG_XRC			= BIT(6),
 	HNS_ROCE_CAP_FLAG_MW			= BIT(7),
 	HNS_ROCE_CAP_FLAG_FRMR                  = BIT(8),
 	HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL		= BIT(9),
@@ -244,6 +248,11 @@  struct hns_roce_pd {
 	unsigned long		pdn;
 };
 
+struct hns_roce_xrcd {
+	struct ib_xrcd ibxrcd;
+	u32 xrcdn;
+};
+
 struct hns_roce_bitmap {
 	/* Bitmap Traversal last a bit which is 1 */
 	unsigned long		last;
@@ -467,6 +476,7 @@  struct hns_roce_srq {
 	u32			rsv_sge;
 	int			wqe_shift;
 	u32			cqn;
+	u32			xrcdn;
 	void __iomem		*db_reg_l;
 
 	atomic_t		refcount;
@@ -640,6 +650,8 @@  struct hns_roce_qp {
 					 enum hns_roce_event event_type);
 	unsigned long		qpn;
 
+	u32			xrcdn;
+
 	atomic_t		refcount;
 	struct completion	free;
 
@@ -766,6 +778,8 @@  struct hns_roce_caps {
 	int		reserved_uars;
 	int		num_pds;
 	int		reserved_pds;
+	u32		num_xrcds;
+	u32		reserved_xrcds;
 	u32		mtt_entry_sz;
 	u32		cqe_sz;
 	u32		page_size_cap;
@@ -963,6 +977,7 @@  struct hns_roce_dev {
 
 	struct hns_roce_cmdq	cmd;
 	struct hns_roce_bitmap    pd_bitmap;
+	struct hns_roce_bitmap xrcd_bitmap;
 	struct hns_roce_uar_table uar_table;
 	struct hns_roce_mr_table  mr_table;
 	struct hns_roce_cq_table  cq_table;
@@ -1004,6 +1019,11 @@  static inline struct hns_roce_pd *to_hr_pd(struct ib_pd *ibpd)
 	return container_of(ibpd, struct hns_roce_pd, ibpd);
 }
 
+static inline struct hns_roce_xrcd *to_hr_xrcd(struct ib_xrcd *ibxrcd)
+{
+	return container_of(ibxrcd, struct hns_roce_xrcd, ibxrcd);
+}
+
 static inline struct hns_roce_ah *to_hr_ah(struct ib_ah *ibah)
 {
 	return container_of(ibah, struct hns_roce_ah, ibah);
@@ -1136,6 +1156,7 @@  int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
 void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
 int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
 int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
+int hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev);
 
 void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev);
 void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev);
@@ -1143,6 +1164,7 @@  void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev);
 void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev);
 void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev);
 void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev);
+void hns_roce_cleanup_xrcd_table(struct hns_roce_dev *hr_dev);
 
 int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj);
 void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
@@ -1207,6 +1229,9 @@  int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
 			struct ib_udata *udata);
 int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
 
+int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata);
+int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata);
+
 struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
 				 struct ib_qp_init_attr *init_attr,
 				 struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index c3934ab..593acf1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -1829,6 +1829,7 @@  static void set_default_caps(struct hns_roce_dev *hr_dev)
 	caps->num_srqwqe_segs	= HNS_ROCE_V2_MAX_SRQWQE_SEGS;
 	caps->num_idx_segs	= HNS_ROCE_V2_MAX_IDX_SEGS;
 	caps->num_pds		= HNS_ROCE_V2_MAX_PD_NUM;
+	caps->num_xrcds		= HNS_ROCE_V2_MAX_XRCD_NUM;
 	caps->max_qp_init_rdma	= HNS_ROCE_V2_MAX_QP_INIT_RDMA;
 	caps->max_qp_dest_rdma	= HNS_ROCE_V2_MAX_QP_DEST_RDMA;
 	caps->max_sq_desc_sz	= HNS_ROCE_V2_MAX_SQ_DESC_SZ;
@@ -1846,6 +1847,7 @@  static void set_default_caps(struct hns_roce_dev *hr_dev)
 	caps->page_size_cap	= HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
 	caps->reserved_lkey	= 0;
 	caps->reserved_pds	= 0;
+	caps->reserved_xrcds	= HNS_ROCE_V2_RSV_XRCD_NUM;
 	caps->reserved_mrws	= 1;
 	caps->reserved_uars	= 0;
 	caps->reserved_cqs	= 0;
@@ -1900,7 +1902,7 @@  static void set_default_caps(struct hns_roce_dev *hr_dev)
 
 	caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW |
 		       HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR |
-		       HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
+		       HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL | HNS_ROCE_CAP_FLAG_XRC;
 
 	caps->num_qpc_timer	  = HNS_ROCE_V2_MAX_QPC_TIMER_NUM;
 	caps->qpc_timer_entry_sz  = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ;
@@ -2131,6 +2133,8 @@  static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
 	caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS;
 	caps->ceqe_size = HNS_ROCE_CEQE_SIZE;
 	caps->aeqe_size = HNS_ROCE_AEQE_SIZE;
+	caps->num_xrcds = HNS_ROCE_V2_MAX_XRCD_NUM;
+	caps->reserved_xrcds = HNS_ROCE_V2_RSV_XRCD_NUM;
 	caps->mtt_ba_pg_sz = 0;
 	caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS;
 	caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
@@ -3909,6 +3913,16 @@  static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
 		       ilog2(hr_qp->rq.wqe_cnt));
 }
 
+static inline int get_cqn(struct ib_cq *ib_cq)
+{
+	return ib_cq ? to_hr_cq(ib_cq)->cqn : 0;
+}
+
+static inline int get_pdn(struct ib_pd *ib_pd)
+{
+	return ib_pd ? to_hr_pd(ib_pd)->pdn : 0;
+}
+
 static void modify_qp_reset_to_init(struct ib_qp *ibqp,
 				    const struct ib_qp_attr *attr,
 				    int attr_mask,
@@ -3925,13 +3939,13 @@  static void modify_qp_reset_to_init(struct ib_qp *ibqp,
 	 * 0 at the same time, else set them to 0x1.
 	 */
 	roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
-		       V2_QPC_BYTE_4_TST_S, to_hr_qp_type(hr_qp->ibqp.qp_type));
+		       V2_QPC_BYTE_4_TST_S, to_hr_qp_type(ibqp->qp_type));
 
 	roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
 		       V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn);
 
 	roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
-		       V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn);
+		       V2_QPC_BYTE_16_PD_S, get_pdn(ibqp->pd));
 
 	roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQWS_M,
 		       V2_QPC_BYTE_20_RQWS_S, ilog2(hr_qp->rq.max_gs));
@@ -3942,6 +3956,9 @@  static void modify_qp_reset_to_init(struct ib_qp *ibqp,
 	roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
 		       V2_QPC_BYTE_24_VLAN_ID_S, 0xfff);
 
+	if (ibqp->qp_type == IB_QPT_XRC_TGT)
+		context->qkey_xrcd = cpu_to_le32(hr_qp->xrcdn);
+
 	if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
 		roce_set_bit(context->byte_68_rq_db,
 			     V2_QPC_BYTE_68_RQ_RECORD_EN_S, 1);
@@ -3956,19 +3973,20 @@  static void modify_qp_reset_to_init(struct ib_qp *ibqp,
 		    (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) ? 1 : 0);
 
 	roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
-		       V2_QPC_BYTE_80_RX_CQN_S, to_hr_cq(ibqp->recv_cq)->cqn);
+		       V2_QPC_BYTE_80_RX_CQN_S, get_cqn(ibqp->recv_cq));
+
 	if (ibqp->srq) {
+		roce_set_bit(context->byte_76_srqn_op_en,
+			     V2_QPC_BYTE_76_SRQ_EN_S, 1);
 		roce_set_field(context->byte_76_srqn_op_en,
 			       V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S,
 			       to_hr_srq(ibqp->srq)->srqn);
-		roce_set_bit(context->byte_76_srqn_op_en,
-			     V2_QPC_BYTE_76_SRQ_EN_S, 1);
 	}
 
 	roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1);
 
 	roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
-		       V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn);
+		       V2_QPC_BYTE_252_TX_CQN_S, get_cqn(ibqp->send_cq));
 
 	if (hr_dev->caps.qpc_sz < HNS_ROCE_V3_QPC_SZ)
 		return;
@@ -3991,22 +4009,28 @@  static void modify_qp_init_to_init(struct ib_qp *ibqp,
 	 * 0 at the same time, else set them to 0x1.
 	 */
 	roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
-		       V2_QPC_BYTE_4_TST_S, to_hr_qp_type(hr_qp->ibqp.qp_type));
+		       V2_QPC_BYTE_4_TST_S, to_hr_qp_type(ibqp->qp_type));
 	roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
 		       V2_QPC_BYTE_4_TST_S, 0);
 
+	if (ibqp->qp_type == IB_QPT_XRC_TGT) {
+		context->qkey_xrcd = cpu_to_le32(hr_qp->xrcdn);
+		qpc_mask->qkey_xrcd = 0;
+	}
+
 	roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
-		       V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn);
+		       V2_QPC_BYTE_16_PD_S, get_pdn(ibqp->pd));
+
 	roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
 		       V2_QPC_BYTE_16_PD_S, 0);
 
 	roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
-		       V2_QPC_BYTE_80_RX_CQN_S, to_hr_cq(ibqp->recv_cq)->cqn);
+		       V2_QPC_BYTE_80_RX_CQN_S, get_cqn(ibqp->recv_cq));
 	roce_set_field(qpc_mask->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
 		       V2_QPC_BYTE_80_RX_CQN_S, 0);
 
 	roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
-		       V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn);
+		       V2_QPC_BYTE_252_TX_CQN_S, get_cqn(ibqp->send_cq));
 	roce_set_field(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
 		       V2_QPC_BYTE_252_TX_CQN_S, 0);
 
@@ -4685,7 +4709,6 @@  static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
 			       V2_QPC_BYTE_244_RNR_CNT_S, 0);
 	}
 
-	/* RC&UC&UD required attr */
 	if (attr_mask & IB_QP_SQ_PSN) {
 		roce_set_field(context->byte_172_sq_psn,
 			       V2_QPC_BYTE_172_SQ_CUR_PSN_M,
@@ -4763,7 +4786,6 @@  static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
 			       V2_QPC_BYTE_80_MIN_RNR_TIME_S, 0);
 	}
 
-	/* RC&UC required attr */
 	if (attr_mask & IB_QP_RQ_PSN) {
 		roce_set_field(context->byte_108_rx_reqepsn,
 			       V2_QPC_BYTE_108_RX_REQ_EPSN_M,
@@ -4806,6 +4828,29 @@  static void hns_roce_v2_record_opt_fields(struct ib_qp *ibqp,
 	}
 }
 
+static void clear_qp(struct hns_roce_qp *hr_qp)
+{
+	struct ib_qp *ibqp = &hr_qp->ibqp;
+
+	if (ibqp->send_cq)
+		hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq),
+				     hr_qp->qpn, NULL);
+
+	if (ibqp->recv_cq  && ibqp->recv_cq != ibqp->send_cq)
+		hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq),
+				     hr_qp->qpn, ibqp->srq ?
+				     to_hr_srq(ibqp->srq) : NULL);
+
+	if (hr_qp->rq.wqe_cnt)
+		*hr_qp->rdb.db_record = 0;
+
+	hr_qp->rq.head = 0;
+	hr_qp->rq.tail = 0;
+	hr_qp->sq.head = 0;
+	hr_qp->sq.tail = 0;
+	hr_qp->next_sge = 0;
+}
+
 static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
 				 const struct ib_qp_attr *attr,
 				 int attr_mask, enum ib_qp_state cur_state,
@@ -4840,19 +4885,23 @@  static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
 
 	/* When QP state is err, SQ and RQ WQE should be flushed */
 	if (new_state == IB_QPS_ERR) {
-		spin_lock_irqsave(&hr_qp->sq.lock, sq_flag);
-		hr_qp->state = IB_QPS_ERR;
-		roce_set_field(context->byte_160_sq_ci_pi,
-			       V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
-			       V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S,
-			       hr_qp->sq.head);
-		roce_set_field(qpc_mask->byte_160_sq_ci_pi,
-			       V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
-			       V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
-		spin_unlock_irqrestore(&hr_qp->sq.lock, sq_flag);
-
-		if (!ibqp->srq) {
+		if (ibqp->qp_type != IB_QPT_XRC_TGT) {
+			spin_lock_irqsave(&hr_qp->sq.lock, sq_flag);
+			hr_qp->state = IB_QPS_ERR;
+			roce_set_field(context->byte_160_sq_ci_pi,
+				       V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
+				       V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S,
+				       hr_qp->sq.head);
+			roce_set_field(qpc_mask->byte_160_sq_ci_pi,
+				       V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
+				       V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
+			spin_unlock_irqrestore(&hr_qp->sq.lock, sq_flag);
+		}
+
+		if (!ibqp->srq && ibqp->qp_type != IB_QPT_XRC_INI &&
+		    ibqp->qp_type != IB_QPT_XRC_TGT) {
 			spin_lock_irqsave(&hr_qp->rq.lock, rq_flag);
+			hr_qp->state = IB_QPS_ERR;
 			roce_set_field(context->byte_84_rq_ci_pi,
 			       V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
 			       V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S,
@@ -4871,7 +4920,8 @@  static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
 		goto out;
 
 	roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S,
-		     ibqp->srq ? 1 : 0);
+		     ((to_hr_qp_type(hr_qp->ibqp.qp_type) == SERV_TYPE_XRC) ||
+		     ibqp->srq) ? 1 : 0);
 	roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
 		     V2_QPC_BYTE_108_INV_CREDIT_S, 0);
 
@@ -4892,21 +4942,8 @@  static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
 
 	hns_roce_v2_record_opt_fields(ibqp, attr, attr_mask);
 
-	if (new_state == IB_QPS_RESET && !ibqp->uobject) {
-		hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn,
-				     ibqp->srq ? to_hr_srq(ibqp->srq) : NULL);
-		if (ibqp->send_cq != ibqp->recv_cq)
-			hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq),
-					     hr_qp->qpn, NULL);
-
-		hr_qp->rq.head = 0;
-		hr_qp->rq.tail = 0;
-		hr_qp->sq.head = 0;
-		hr_qp->sq.tail = 0;
-		hr_qp->next_sge = 0;
-		if (hr_qp->rq.wqe_cnt)
-			*hr_qp->rdb.db_record = 0;
-	}
+	if (new_state == IB_QPS_RESET && !ibqp->uobject)
+		clear_qp(hr_qp);
 
 out:
 	return ret;
@@ -5017,7 +5054,9 @@  static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 				    V2_QPC_BYTE_76_ATE_S)) << V2_QP_ATE_S);
 
 	if (hr_qp->ibqp.qp_type == IB_QPT_RC ||
-	    hr_qp->ibqp.qp_type == IB_QPT_UC) {
+	    hr_qp->ibqp.qp_type == IB_QPT_UC ||
+	    hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+	    hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT) {
 		struct ib_global_route *grh =
 				rdma_ah_retrieve_grh(&qp_attr->ah_attr);
 
@@ -5049,6 +5088,7 @@  static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 	qp_attr->max_dest_rd_atomic = 1 << roce_get_field(context.byte_140_raq,
 						     V2_QPC_BYTE_140_RR_MAX_M,
 						     V2_QPC_BYTE_140_RR_MAX_S);
+
 	qp_attr->min_rnr_timer = (u8)roce_get_field(context.byte_80_rnr_rx_cqn,
 						 V2_QPC_BYTE_80_MIN_RNR_TIME_M,
 						 V2_QPC_BYTE_80_MIN_RNR_TIME_S);
@@ -5083,6 +5123,15 @@  static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 	return ret;
 }
 
+static inline int modify_qp_is_ok(struct hns_roce_qp *hr_qp)
+{
+	return ((hr_qp->ibqp.qp_type == IB_QPT_RC ||
+		 hr_qp->ibqp.qp_type == IB_QPT_UD ||
+		 hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+		 hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT) &&
+		hr_qp->state != IB_QPS_RESET);
+}
+
 static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
 					 struct hns_roce_qp *hr_qp,
 					 struct ib_udata *udata)
@@ -5092,9 +5141,7 @@  static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
 	unsigned long flags;
 	int ret = 0;
 
-	if ((hr_qp->ibqp.qp_type == IB_QPT_RC ||
-	     hr_qp->ibqp.qp_type == IB_QPT_UD) &&
-	   hr_qp->state != IB_QPS_RESET) {
+	if (modify_qp_is_ok(hr_qp)) {
 		/* Modify qp to reset before destroying qp */
 		ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0,
 					    hr_qp->state, IB_QPS_RESET);
@@ -5275,7 +5322,7 @@  static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf)
 	hr_reg_write(ctx, SRQC_SRQ_ST, 1);
 	hr_reg_write(ctx, SRQC_PD, to_hr_pd(srq->ibsrq.pd)->pdn);
 	hr_reg_write(ctx, SRQC_SRQN, srq->srqn);
-	hr_reg_write(ctx, SRQC_XRCD, 0);
+	hr_reg_write(ctx, SRQC_XRCD, srq->xrcdn);
 	hr_reg_write(ctx, SRQC_XRC_CQN, srq->cqn);
 	hr_reg_write(ctx, SRQC_SHIFT, ilog2(srq->wqe_cnt));
 	hr_reg_write(ctx, SRQC_RQWS,
@@ -5479,6 +5526,12 @@  static void hns_roce_irq_work_handle(struct work_struct *work)
 	case HNS_ROCE_EVENT_TYPE_FLR:
 		ibdev_warn(ibdev, "Function level reset.\n");
 		break;
+	case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+		ibdev_err(ibdev, "xrc domain violation error.\n");
+		break;
+	case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
+		ibdev_err(ibdev, "invalid xrceth error.\n");
+		break;
 	default:
 		break;
 	}
@@ -5579,6 +5632,8 @@  static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 		case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
 		case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
 		case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+		case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+		case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
 			hns_roce_qp_event(hr_dev, queue_num, event_type);
 			break;
 		case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 39621fb..ffdae15 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -74,6 +74,8 @@ 
 #define HNS_ROCE_V2_MAX_SRQWQE_SEGS		0x1000000
 #define HNS_ROCE_V2_MAX_IDX_SEGS		0x1000000
 #define HNS_ROCE_V2_MAX_PD_NUM			0x1000000
+#define HNS_ROCE_V2_MAX_XRCD_NUM		0x1000000
+#define HNS_ROCE_V2_RSV_XRCD_NUM		0
 #define HNS_ROCE_V2_MAX_QP_INIT_RDMA		128
 #define HNS_ROCE_V2_MAX_QP_DEST_RDMA		128
 #define HNS_ROCE_V2_MAX_SQ_DESC_SZ		64
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index c9c0836..1a747f7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -207,6 +207,9 @@  static int hns_roce_query_device(struct ib_device *ib_dev,
 		props->max_fast_reg_page_list_len = HNS_ROCE_FRMR_MAX_PA;
 	}
 
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+		props->device_cap_flags |= IB_DEVICE_XRC;
+
 	return 0;
 }
 
@@ -300,6 +303,7 @@  static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
 		return -EAGAIN;
 
 	resp.qp_tab_size = hr_dev->caps.num_qps;
+	resp.srq_tab_size = hr_dev->caps.num_srqs;
 
 	ret = hns_roce_uar_alloc(hr_dev, &context->uar);
 	if (ret)
@@ -461,6 +465,13 @@  static const struct ib_device_ops hns_roce_dev_srq_ops = {
 	INIT_RDMA_OBJ_SIZE(ib_srq, hns_roce_srq, ibsrq),
 };
 
+static const struct ib_device_ops hns_roce_dev_xrcd_ops = {
+	.alloc_xrcd = hns_roce_alloc_xrcd,
+	.dealloc_xrcd = hns_roce_dealloc_xrcd,
+
+	INIT_RDMA_OBJ_SIZE(ib_xrcd, hns_roce_xrcd, ibxrcd),
+};
+
 static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
 {
 	int ret;
@@ -484,20 +495,20 @@  static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR)
 		ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops);
 
-	/* MW */
 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW)
 		ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops);
 
-	/* FRMR */
 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR)
 		ib_set_device_ops(ib_dev, &hns_roce_dev_frmr_ops);
 
-	/* SRQ */
 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
 		ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops);
 		ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops);
 	}
 
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+		ib_set_device_ops(ib_dev, &hns_roce_dev_xrcd_ops);
+
 	ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops);
 	ib_set_device_ops(ib_dev, &hns_roce_dev_ops);
 	for (i = 0; i < hr_dev->caps.num_ports; i++) {
@@ -727,10 +738,19 @@  static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
 		goto err_uar_alloc_free;
 	}
 
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC) {
+		ret = hns_roce_init_xrcd_table(hr_dev);
+		if (ret) {
+			dev_err(dev, "failed to init xrcd table, ret = %d.\n",
+				ret);
+			goto err_pd_table_free;
+		}
+	}
+
 	ret = hns_roce_init_mr_table(hr_dev);
 	if (ret) {
 		dev_err(dev, "Failed to init memory region table.\n");
-		goto err_pd_table_free;
+		goto err_xrcd_table_free;
 	}
 
 	hns_roce_init_cq_table(hr_dev);
@@ -759,6 +779,10 @@  static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
 	hns_roce_cleanup_cq_table(hr_dev);
 	hns_roce_cleanup_mr_table(hr_dev);
 
+err_xrcd_table_free:
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+		hns_roce_cleanup_xrcd_table(hr_dev);
+
 err_pd_table_free:
 	hns_roce_cleanup_pd_table(hr_dev);
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index cca818d..3ca51ce 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -137,3 +137,54 @@  void hns_roce_cleanup_uar_table(struct hns_roce_dev *hr_dev)
 {
 	hns_roce_bitmap_cleanup(&hr_dev->uar_table.bitmap);
 }
+
+static int hns_roce_xrcd_alloc(struct hns_roce_dev *hr_dev, u32 *xrcdn)
+{
+	return hns_roce_bitmap_alloc(&hr_dev->xrcd_bitmap,
+				     (unsigned long *)xrcdn);
+}
+
+static void hns_roce_xrcd_free(struct hns_roce_dev *hr_dev,
+			       u32 xrcdn)
+{
+	hns_roce_bitmap_free(&hr_dev->xrcd_bitmap, xrcdn, BITMAP_NO_RR);
+}
+
+int hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev)
+{
+	return hns_roce_bitmap_init(&hr_dev->xrcd_bitmap,
+				    hr_dev->caps.num_xrcds,
+				    hr_dev->caps.num_xrcds - 1,
+				    hr_dev->caps.reserved_xrcds, 0);
+}
+
+void hns_roce_cleanup_xrcd_table(struct hns_roce_dev *hr_dev)
+{
+	hns_roce_bitmap_cleanup(&hr_dev->xrcd_bitmap);
+}
+
+int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(ib_xrcd->device);
+	struct hns_roce_xrcd *xrcd = to_hr_xrcd(ib_xrcd);
+	int ret;
+
+	if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC))
+		return -EINVAL;
+
+	ret = hns_roce_xrcd_alloc(hr_dev, &xrcd->xrcdn);
+	if (ret) {
+		dev_err(hr_dev->dev, "failed to alloc xrcdn, ret = %d.\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata)
+{
+	hns_roce_xrcd_free(to_hr_dev(ib_xrcd->device),
+			   to_hr_xrcd(ib_xrcd)->xrcdn);
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 004aca9..9169225 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -98,7 +98,9 @@  void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
 	if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 &&
 	    (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR ||
 	     event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR ||
-	     event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR)) {
+	     event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR ||
+	     event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION ||
+	     event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH)) {
 		qp->state = IB_QPS_ERR;
 		if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &qp->flush_flag))
 			init_flush_work(hr_dev, qp);
@@ -142,6 +144,8 @@  static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
 			event.event = IB_EVENT_QP_REQ_ERR;
 			break;
 		case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+		case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+		case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
 			event.event = IB_EVENT_QP_ACCESS_ERR;
 			break;
 		default:
@@ -366,8 +370,13 @@  void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
 	unsigned long flags;
 
 	list_del(&hr_qp->node);
-	list_del(&hr_qp->sq_node);
-	list_del(&hr_qp->rq_node);
+
+	if (hr_qp->ibqp.qp_type != IB_QPT_XRC_TGT)
+		list_del(&hr_qp->sq_node);
+
+	if (hr_qp->ibqp.qp_type != IB_QPT_XRC_INI &&
+	    hr_qp->ibqp.qp_type != IB_QPT_XRC_TGT)
+		list_del(&hr_qp->rq_node);
 
 	xa_lock_irqsave(xa, flags);
 	__xa_erase(xa, hr_qp->qpn & (hr_dev->caps.num_qps - 1));
@@ -1100,11 +1109,16 @@  static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type,
 			 bool is_user)
 {
 	switch (type) {
+	case IB_QPT_XRC_INI:
+	case IB_QPT_XRC_TGT:
+		if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC))
+			goto out;
+		break;
 	case IB_QPT_UD:
 		if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 &&
 		    is_user)
 			goto out;
-		fallthrough;
+		break;
 	case IB_QPT_RC:
 	case IB_QPT_GSI:
 		break;
@@ -1124,8 +1138,8 @@  struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
 				 struct ib_qp_init_attr *init_attr,
 				 struct ib_udata *udata)
 {
-	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
-	struct ib_device *ibdev = &hr_dev->ib_dev;
+	struct ib_device *ibdev = pd ? pd->device : init_attr->xrcd->device;
+	struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
 	struct hns_roce_qp *hr_qp;
 	int ret;
 
@@ -1137,6 +1151,15 @@  struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
 	if (!hr_qp)
 		return ERR_PTR(-ENOMEM);
 
+	if (init_attr->qp_type == IB_QPT_XRC_INI)
+		init_attr->recv_cq = NULL;
+
+	if (init_attr->qp_type == IB_QPT_XRC_TGT) {
+		hr_qp->xrcdn = to_hr_xrcd(init_attr->xrcd)->xrcdn;
+		init_attr->recv_cq = NULL;
+		init_attr->send_cq = NULL;
+	}
+
 	if (init_attr->qp_type == IB_QPT_GSI) {
 		hr_qp->port = init_attr->port_num - 1;
 		hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
@@ -1156,20 +1179,20 @@  struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
 
 int to_hr_qp_type(int qp_type)
 {
-	int transport_type;
-
-	if (qp_type == IB_QPT_RC)
-		transport_type = SERV_TYPE_RC;
-	else if (qp_type == IB_QPT_UC)
-		transport_type = SERV_TYPE_UC;
-	else if (qp_type == IB_QPT_UD)
-		transport_type = SERV_TYPE_UD;
-	else if (qp_type == IB_QPT_GSI)
-		transport_type = SERV_TYPE_UD;
-	else
-		transport_type = -1;
-
-	return transport_type;
+	switch (qp_type) {
+	case IB_QPT_RC:
+		return SERV_TYPE_RC;
+	case IB_QPT_UC:
+		return SERV_TYPE_UC;
+	case IB_QPT_UD:
+	case IB_QPT_GSI:
+		return SERV_TYPE_UD;
+	case IB_QPT_XRC_INI:
+	case IB_QPT_XRC_TGT:
+		return SERV_TYPE_XRC;
+	default:
+		return -1;
+	}
 }
 
 static int check_mtu_validate(struct hns_roce_dev *hr_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index d5a6de0..f71459a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -314,6 +314,9 @@  static void set_srq_ext_param(struct hns_roce_srq *srq,
 {
 	srq->cqn = ib_srq_has_cq(init_attr->srq_type) ?
 		   to_hr_cq(init_attr->ext.cq)->cqn : 0;
+
+	srq->xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ?
+		     to_hr_xrcd(init_attr->ext.xrc.xrcd)->xrcdn : 0;
 }
 
 static int set_srq_param(struct hns_roce_srq *srq,
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index 90b739d..42b1776 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -86,6 +86,8 @@  struct hns_roce_ib_create_qp_resp {
 struct hns_roce_ib_alloc_ucontext_resp {
 	__u32	qp_tab_size;
 	__u32	cqe_size;
+	__u32	srq_tab_size;
+	__u32	reserved;
 };
 
 struct hns_roce_ib_alloc_pd_resp {