diff mbox series

[for-next] RDMA/hns: Add FRMR support for hip08

Message ID 1538131439-59581-1-git-send-email-liuyixian@huawei.com (mailing list archive)
State Superseded
Headers show
Series [for-next] RDMA/hns: Add FRMR support for hip08 | expand

Commit Message

Yixian Liu Sept. 28, 2018, 10:43 a.m. UTC
This patch adds fast register physical memory region (FRMR)
support for hip08.

Signed-off-by: Yixian Liu <liuyixian@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_device.h |  10 +++
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 107 ++++++++++++++++++++++++++--
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h  |  22 ++++++
 drivers/infiniband/hw/hns/hns_roce_main.c   |   9 +++
 drivers/infiniband/hw/hns/hns_roce_mr.c     |  92 +++++++++++++++++++++---
 5 files changed, 227 insertions(+), 13 deletions(-)

Comments

Jason Gunthorpe Oct. 3, 2018, 10:38 p.m. UTC | #1
On Fri, Sep 28, 2018 at 06:43:59PM +0800, Yixian Liu wrote:
> diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
> index 8c5160e..788c921 100644
> +++ b/drivers/infiniband/hw/hns/hns_roce_main.c
> @@ -220,6 +220,9 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
>  	props->max_pkeys = 1;
>  	props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
>  
> +	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR)
> +		props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
> +

The IB spec has a feature set requirement before this bit can be set,
so I think the flags test should be broader..

Does hns now implement all the needed features? If I recall send with
invalidate and other things are included in this feature bit, please
check the spec and confirm.

Jason
Yixian Liu Oct. 5, 2018, 9:30 a.m. UTC | #2
On 2018/10/4 6:38, Jason Gunthorpe wrote:
> On Fri, Sep 28, 2018 at 06:43:59PM +0800, Yixian Liu wrote:
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
>> index 8c5160e..788c921 100644
>> +++ b/drivers/infiniband/hw/hns/hns_roce_main.c
>> @@ -220,6 +220,9 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
>>  	props->max_pkeys = 1;
>>  	props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
>>  
>> +	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR)
>> +		props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
>> +
> 
> The IB spec has a feature set requirement before this bit can be set,
> so I think the flags test should be broader..
> 
> Does hns now implement all the needed features? If I recall send with
> invalidate and other things are included in this feature bit, please
> check the spec and confirm.
> 
> Jason
> 

I have confirmed IB spec and our hns that send with invalidate is supported
but reserved lkey is not. Thus, I will remove this flag set in next version.

Thanks
Sagi Grimberg Oct. 15, 2018, 8:38 a.m. UTC | #3
>> The IB spec has a feature set requirement before this bit can be set,
>> so I think the flags test should be broader..
>>
>> Does hns now implement all the needed features? If I recall send with
>> invalidate and other things are included in this feature bit, please
>> check the spec and confirm.
>>
>> Jason
>>
> 
> I have confirmed IB spec and our hns that send with invalidate is supported
> but reserved lkey is not. Thus, I will remove this flag set in next version.

I think we have a different capability for that one:
IB_DEVICE_LOCAL_DMA_LKEY

I think if you support frmr + remote invalidate you can set the base
memory mgmt extensions bit...
Yixian Liu Oct. 15, 2018, 10:36 a.m. UTC | #4
On 2018/10/15 16:38, Sagi Grimberg wrote:
> 
>>> The IB spec has a feature set requirement before this bit can be set,
>>> so I think the flags test should be broader..
>>>
>>> Does hns now implement all the needed features? If I recall send with
>>> invalidate and other things are included in this feature bit, please
>>> check the spec and confirm.
>>>
>>> Jason
>>>
>>
>> I have confirmed IB spec and our hns that send with invalidate is supported
>> but reserved lkey is not. Thus, I will remove this flag set in next version.
> 
> I think we have a different capability for that one:
> IB_DEVICE_LOCAL_DMA_LKEY
> 
> I think if you support frmr + remote invalidate you can set the base
> memory mgmt extensions bit...
> 

Hi Sagi, thanks for your reminder.

Hi Jason, what's your opinion? Can I set the base memory mgmt bit?
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index ffa9255..291e063 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -88,8 +88,11 @@ 
 #define BITMAP_RR				1
 
 #define MR_TYPE_MR				0x00
+#define MR_TYPE_FRMR				0x01
 #define MR_TYPE_DMA				0x03
 
+#define HNS_ROCE_FRMR_MAX_PA			512
+
 #define PKEY_ID					0xffff
 #define GUID_LEN				8
 #define NODE_DESC_SIZE				64
@@ -194,6 +197,7 @@  enum {
 	HNS_ROCE_CAP_FLAG_RECORD_DB		= BIT(3),
 	HNS_ROCE_CAP_FLAG_SQ_RECORD_DB		= BIT(4),
 	HNS_ROCE_CAP_FLAG_MW			= BIT(7),
+	HNS_ROCE_CAP_FLAG_FRMR                  = BIT(8),
 	HNS_ROCE_CAP_FLAG_ATOMIC		= BIT(10),
 };
 
@@ -308,6 +312,7 @@  struct hns_roce_mr {
 	u32			key; /* Key of MR */
 	u32			pd;   /* PD num of MR */
 	u32			access;/* Access permission of MR */
+	u32			npages;
 	int			enabled; /* MR's active status */
 	int			type;	/* MR's register type */
 	u64			*pbl_buf;/* MR's PBL space */
@@ -770,6 +775,7 @@  struct hns_roce_hw {
 				struct hns_roce_mr *mr, int flags, u32 pdn,
 				int mr_access_flags, u64 iova, u64 size,
 				void *mb_buf);
+	int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr);
 	int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw);
 	void (*write_cqc)(struct hns_roce_dev *hr_dev,
 			  struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
@@ -980,6 +986,10 @@  struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length,
 			   u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
 			   struct ib_udata *udata);
+struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+				u32 max_num_sg);
+int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+		       unsigned int *sg_offset);
 int hns_roce_dereg_mr(struct ib_mr *ibmr);
 int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
 		       struct hns_roce_cmd_mailbox *mailbox,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 0d7568e..a06511a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -54,6 +54,47 @@  static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
 	dseg->len  = cpu_to_le32(sg->length);
 }
 
+static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+			 struct hns_roce_wqe_frmr_seg *fseg,
+			 const struct ib_reg_wr *wr)
+{
+	struct hns_roce_mr *mr = to_hr_mr(wr->mr);
+
+	/* use ib_access_flags */
+	roce_set_bit(rc_sq_wqe->byte_4,
+		     V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
+		     wr->access & IB_ACCESS_MW_BIND ? 1 : 0);
+	roce_set_bit(rc_sq_wqe->byte_4,
+		     V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S,
+		     wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
+	roce_set_bit(rc_sq_wqe->byte_4,
+		     V2_RC_FRMR_WQE_BYTE_4_RR_S,
+		     wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0);
+	roce_set_bit(rc_sq_wqe->byte_4,
+		     V2_RC_FRMR_WQE_BYTE_4_RW_S,
+		     wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
+	roce_set_bit(rc_sq_wqe->byte_4,
+		     V2_RC_FRMR_WQE_BYTE_4_LW_S,
+		     wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
+
+	/* Data structure reuse may lead to confusion */
+	rc_sq_wqe->msg_len = cpu_to_le32(mr->pbl_ba & 0xffffffff);
+	rc_sq_wqe->inv_key = cpu_to_le32(mr->pbl_ba >> 32);
+
+	rc_sq_wqe->byte_16 = cpu_to_le32(wr->mr->length & 0xffffffff);
+	rc_sq_wqe->byte_20 = cpu_to_le32(wr->mr->length >> 32);
+	rc_sq_wqe->rkey = cpu_to_le32(wr->key);
+	rc_sq_wqe->va = cpu_to_le64(wr->mr->iova);
+
+	fseg->pbl_size = cpu_to_le32(mr->pbl_size);
+	roce_set_field(fseg->mode_buf_pg_sz,
+		       V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M,
+		       V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S,
+		       mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+	roce_set_bit(fseg->mode_buf_pg_sz,
+		     V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
+}
+
 static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
 			   const struct ib_atomic_wr *wr)
 {
@@ -192,6 +233,7 @@  static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 	struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
 	struct hns_roce_qp *qp = to_hr_qp(ibqp);
 	struct hns_roce_v2_wqe_data_seg *dseg;
+	struct hns_roce_wqe_frmr_seg *fseg;
 	struct device *dev = hr_dev->dev;
 	struct hns_roce_v2_db sq_db;
 	struct ib_qp_attr attr;
@@ -458,6 +500,11 @@  static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 			case IB_WR_LOCAL_INV:
 				hr_op = HNS_ROCE_V2_WQE_OP_LOCAL_INV;
 				break;
+			case IB_WR_REG_MR:
+				hr_op = HNS_ROCE_V2_WQE_OP_FAST_REG_PMR;
+				fseg = wqe;
+				set_frmr_seg(rc_sq_wqe, fseg, reg_wr(wr));
+				break;
 			case IB_WR_ATOMIC_CMP_AND_SWP:
 				hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP;
 				rc_sq_wqe->rkey =
@@ -499,10 +546,13 @@  static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 			else
 				dseg = wqe;
 
-			ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe,
-						&sge_ind, bad_wr);
-			if (ret)
-				goto out;
+			if (wr->opcode != IB_WR_REG_MR) {
+				ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe,
+							wqe, &sge_ind, bad_wr);
+				if (ret)
+					goto out;
+			}
+
 			ind++;
 		} else {
 			dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
@@ -1261,7 +1311,8 @@  static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 				  HNS_ROCE_CAP_FLAG_SQ_RECORD_DB;
 
 	if (hr_dev->pci_dev->revision == 0x21)
-		caps->flags |= HNS_ROCE_CAP_FLAG_MW;
+		caps->flags |= HNS_ROCE_CAP_FLAG_MW |
+			       HNS_ROCE_CAP_FLAG_FRMR;
 
 	caps->pkey_table_len[0] = 1;
 	caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
@@ -1829,6 +1880,48 @@  static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
 	return 0;
 }
 
+static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
+{
+	struct hns_roce_v2_mpt_entry *mpt_entry;
+
+	mpt_entry = mb_buf;
+	memset(mpt_entry, 0, sizeof(*mpt_entry));
+
+	roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
+		       V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
+	roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
+		       V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1);
+	roce_set_field(mpt_entry->byte_4_pd_hop_st,
+		       V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
+		       V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
+		       mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
+	roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
+		       V2_MPT_BYTE_4_PD_S, mr->pd);
+
+	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1);
+	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
+	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
+
+	roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1);
+	roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
+	roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0);
+	roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
+
+	mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
+
+	mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
+	roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M,
+		       V2_MPT_BYTE_48_PBL_BA_H_S,
+		       upper_32_bits(mr->pbl_ba >> 3));
+
+	roce_set_field(mpt_entry->byte_64_buf_pa1,
+		       V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
+		       V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
+		       mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+
+	return 0;
+}
+
 static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
 {
 	struct hns_roce_v2_mpt_entry *mpt_entry;
@@ -2805,6 +2898,9 @@  static void modify_qp_reset_to_init(struct ib_qp *ibqp,
 	roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_MSG_RNR_FLG_S,
 		     0);
 
+	roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1);
+	roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 0);
+
 	roce_set_field(qpc_mask->byte_176_msg_pktn,
 		       V2_QPC_BYTE_176_MSG_USE_PKTN_M,
 		       V2_QPC_BYTE_176_MSG_USE_PKTN_S, 0);
@@ -5219,6 +5315,7 @@  static const struct hns_roce_hw hns_roce_hw_v2 = {
 	.set_mac = hns_roce_v2_set_mac,
 	.write_mtpt = hns_roce_v2_write_mtpt,
 	.rereg_write_mtpt = hns_roce_v2_rereg_write_mtpt,
+	.frmr_write_mtpt = hns_roce_v2_frmr_write_mtpt,
 	.mw_write_mtpt = hns_roce_v2_mw_write_mtpt,
 	.write_cqc = hns_roce_v2_write_cqc,
 	.set_hem = hns_roce_v2_set_hem,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 7125426..18985c7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -887,6 +887,8 @@  struct hns_roce_v2_mpt_entry {
 #define V2_MPT_BYTE_8_MW_CNT_S 8
 #define V2_MPT_BYTE_8_MW_CNT_M GENMASK(31, 8)
 
+#define V2_MPT_BYTE_12_FRE_S 0
+
 #define V2_MPT_BYTE_12_PA_S 1
 
 #define V2_MPT_BYTE_12_MR_MW_S 4
@@ -1059,6 +1061,16 @@  struct hns_roce_v2_rc_send_wqe {
 
 #define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12
 
+#define V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S 19
+
+#define V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S 20
+
+#define V2_RC_FRMR_WQE_BYTE_4_RR_S 21
+
+#define V2_RC_FRMR_WQE_BYTE_4_RW_S 22
+
+#define V2_RC_FRMR_WQE_BYTE_4_LW_S 23
+
 #define	V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0
 #define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0)
 
@@ -1068,6 +1080,16 @@  struct hns_roce_v2_rc_send_wqe {
 #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
 #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
 
+struct hns_roce_wqe_frmr_seg {
+	__le32	pbl_size;
+	__le32	mode_buf_pg_sz;
+};
+
+#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S	4
+#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M	GENMASK(7, 4)
+
+#define V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S 8
+
 struct hns_roce_v2_wqe_data_seg {
 	__le32    len;
 	__le32    lkey;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 8c5160e..788c921 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -220,6 +220,9 @@  static int hns_roce_query_device(struct ib_device *ib_dev,
 	props->max_pkeys = 1;
 	props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
 
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR)
+		props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+
 	return 0;
 }
 
@@ -534,6 +537,12 @@  static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
 					(1ULL << IB_USER_VERBS_CMD_DEALLOC_MW);
 	}
 
+	/* FRMR */
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) {
+		ib_dev->alloc_mr		= hns_roce_alloc_mr;
+		ib_dev->map_mr_sg		= hns_roce_map_mr_sg;
+	}
+
 	/* OTHERS */
 	ib_dev->get_port_immutable	= hns_roce_port_immutable;
 	ib_dev->disassociate_ucontext	= hns_roce_disassociate_ucontext;
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 0613c11..521ad2a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -329,7 +329,7 @@  static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
 	u64 bt_idx;
 	u64 size;
 
-	mhop_num = hr_dev->caps.pbl_hop_num;
+	mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
 	pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
 	pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
 
@@ -351,7 +351,7 @@  static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
 
 		mr->pbl_size = npages;
 		mr->pbl_ba = mr->pbl_dma_addr;
-		mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
+		mr->pbl_hop_num = mhop_num;
 		mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
 		mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
 		return 0;
@@ -511,7 +511,6 @@  static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
 	mr->key = hw_index_to_key(index);	/* MR key */
 
 	if (size == ~0ull) {
-		mr->type = MR_TYPE_DMA;
 		mr->pbl_buf = NULL;
 		mr->pbl_dma_addr = 0;
 		/* PBL multi-hop addressing parameters */
@@ -522,7 +521,6 @@  static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
 		mr->pbl_l1_dma_addr = NULL;
 		mr->pbl_l0_dma_addr = 0;
 	} else {
-		mr->type = MR_TYPE_MR;
 		if (!hr_dev->caps.pbl_hop_num) {
 			mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
 							 &(mr->pbl_dma_addr),
@@ -548,9 +546,9 @@  static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
 	u32 mhop_num;
 	u64 bt_idx;
 
-	npages = ib_umem_page_count(mr->umem);
+	npages = mr->pbl_size;
 	pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
-	mhop_num = hr_dev->caps.pbl_hop_num;
+	mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num;
 
 	if (mhop_num == HNS_ROCE_HOP_NUM_0)
 		return;
@@ -636,7 +634,8 @@  static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
 	}
 
 	if (mr->size != ~0ULL) {
-		npages = ib_umem_page_count(mr->umem);
+		if (mr->type == MR_TYPE_MR)
+			npages = ib_umem_page_count(mr->umem);
 
 		if (!hr_dev->caps.pbl_hop_num)
 			dma_free_coherent(dev, (unsigned int)(npages * 8),
@@ -674,7 +673,10 @@  static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
 		goto err_table;
 	}
 
-	ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
+	if (mr->type != MR_TYPE_FRMR)
+		ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
+	else
+		ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
 	if (ret) {
 		dev_err(dev, "Write mtpt fail!\n");
 		goto err_page;
@@ -855,6 +857,8 @@  struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
 	if (mr == NULL)
 		return  ERR_PTR(-ENOMEM);
 
+	mr->type = MR_TYPE_DMA;
+
 	/* Allocate memory region key */
 	ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
 				~0ULL, acc, 0, mr);
@@ -1031,6 +1035,8 @@  struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 		}
 	}
 
+	mr->type = MR_TYPE_MR;
+
 	ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
 				access_flags, n, mr);
 	if (ret)
@@ -1202,6 +1208,76 @@  int hns_roce_dereg_mr(struct ib_mr *ibmr)
 	return ret;
 }
 
+struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+				u32 max_num_sg)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+	struct device *dev = hr_dev->dev;
+	struct hns_roce_mr *mr;
+	u64 length;
+	u32 page_size;
+	int ret;
+
+	page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT);
+	length = max_num_sg * page_size;
+
+	if (mr_type != IB_MR_TYPE_MEM_REG)
+		return ERR_PTR(-EINVAL);
+
+	if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
+		dev_err(dev, "max_num_sg larger than %d\n",
+			HNS_ROCE_FRMR_MAX_PA);
+		return ERR_PTR(-EINVAL);
+	}
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	mr->type = MR_TYPE_FRMR;
+
+	/* Allocate memory region key */
+	ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length,
+				0, max_num_sg, mr);
+	if (ret)
+		goto err_free;
+
+	ret = hns_roce_mr_enable(hr_dev, mr);
+	if (ret)
+		goto err_mr;
+
+	mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
+	mr->umem = NULL;
+
+	return &mr->ibmr;
+
+err_mr:
+	hns_roce_mr_free(to_hr_dev(pd->device), mr);
+
+err_free:
+	kfree(mr);
+	return ERR_PTR(ret);
+}
+
+static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
+{
+	struct hns_roce_mr *mr = to_hr_mr(ibmr);
+
+	mr->pbl_buf[mr->npages++] = cpu_to_le64(addr);
+
+	return 0;
+}
+
+int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+		       unsigned int *sg_offset)
+{
+	struct hns_roce_mr *mr = to_hr_mr(ibmr);
+
+	mr->npages = 0;
+
+	return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
+}
+
 static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
 			     struct hns_roce_mw *mw)
 {