diff mbox series

[for-rc,v6] RDMA/rxe: Fix pd ref counting in rxe mr verbs.

Message ID 20220901200426.3236-1-rpearsonhpe@gmail.com (mailing list archive)
State Changes Requested
Headers show
Series [for-rc,v6] RDMA/rxe: Fix pd ref counting in rxe mr verbs. | expand

Commit Message

Bob Pearson Sept. 1, 2022, 8:04 p.m. UTC
Move referencing pd in mr objects ahead of any possible errors
so that it will always be set in rxe_mr_complete() to avoid
seg faults when rxe_put(mr_pd(mr)) is called. Adjust the reference
counts so that each call to rxe_mr_init_xxx() takes one reference.
This reference count is dropped in rxe_mr_cleanup() in error paths
in the reg mr verbs and the dereg mr verb. Minor white space cleanups.

These errors have been seen in rxe_mr_init_user() when there isn't
enough memory to create the mr maps. Previously the error return
path didn't reach setting ibpd in mr->ibmr which caused a seg fault.

Fixes: 364e282c4fe7e ("RDMA/rxe: Split MEM into MR and MW")
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
v6:
  Separated from other patch in original series and resubmitted
  rebased to current for-rc.
  Renamed from "RDMA/rxe: Set pd early in mr alloc routines" to
  "RDMA/rxe: Fix pd ref counting in rxe mr verbs"
  Added more text to describe the change.
  Added fixes line.
  Simplified the patch by leaving pd code in rxe_mr.c instead of
  moving it up to rxe_verbs.c
v5:
  Dropped cleanup code from patch per Li Zhijian.
  Split up into two small patches.
v4:
  Added set mr->ibmr.pd back to avoid an -ENOMEM error causing
  rxe_put(mr_pd(mr)); to seg fault in rxe_mr_cleanup() since pd
  is not getting set in the error path.
v3:
  Rebased to apply to current for-next after
  	Revert "RDMA/rxe: Create duplicate mapping tables for FMRs"
v2:
  Moved setting mr->umem until after checks to avoid sending
  an ERR_PTR to ib_umem_release().
  Cleaned up umem and map sets if errors occur in alloc mr calls.
  Rebased to current for-next.
---
 drivers/infiniband/sw/rxe/rxe_mr.c    | 24 ++++++++++++++----------
 drivers/infiniband/sw/rxe/rxe_verbs.c | 27 +++++++--------------------
 2 files changed, 21 insertions(+), 30 deletions(-)


base-commit: 45baad7dd98f4d83f67c86c28769d3184390e324

Comments

Li Zhijian Sept. 2, 2022, 3:16 a.m. UTC | #1
On 02/09/2022 04:04, Bob Pearson wrote:
> Move referencing pd in mr objects ahead of any possible errors
> so that it will always be set in rxe_mr_complete() to avoid
> seg faults when rxe_put(mr_pd(mr)) is called. Adjust the reference
> counts so that each call to rxe_mr_init_xxx() takes one reference.
> This reference count is dropped in rxe_mr_cleanup() in error paths
> in the reg mr verbs and the dereg mr verb. Minor white space cleanups.
>
> These errors have been seen in rxe_mr_init_user() when there isn't
> enough memory to create the mr maps. Previously the error return
> path didn't reach setting ibpd in mr->ibmr which caused a seg fault.
>
> Fixes: 364e282c4fe7e ("RDMA/rxe: Split MEM into MR and MW")
> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>

Reviewed-by Li Zhijian <lizhijian@fujitsu.com>

Thanks

> ---
> v6:
>    Separated from other patch in original series and resubmitted
>    rebased to current for-rc.
>    Renamed from "RDMA/rxe: Set pd early in mr alloc routines" to
>    "RDMA/rxe: Fix pd ref counting in rxe mr verbs"
>    Added more text to describe the change.
>    Added fixes line.
>    Simplified the patch by leaving pd code in rxe_mr.c instead of
>    moving it up to rxe_verbs.c
> v5:
>    Dropped cleanup code from patch per Li Zhijian.
>    Split up into two small patches.
> v4:
>    Added set mr->ibmr.pd back to avoid an -ENOMEM error causing
>    rxe_put(mr_pd(mr)); to seg fault in rxe_mr_cleanup() since pd
>    is not getting set in the error path.
> v3:
>    Rebased to apply to current for-next after
>    	Revert "RDMA/rxe: Create duplicate mapping tables for FMRs"
> v2:
>    Moved setting mr->umem until after checks to avoid sending
>    an ERR_PTR to ib_umem_release().
>    Cleaned up umem and map sets if errors occur in alloc mr calls.
>    Rebased to current for-next.
> ---
>   drivers/infiniband/sw/rxe/rxe_mr.c    | 24 ++++++++++++++----------
>   drivers/infiniband/sw/rxe/rxe_verbs.c | 27 +++++++--------------------
>   2 files changed, 21 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
> index 850b80f5ad8b..5f4daffccb40 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mr.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mr.c
> @@ -107,7 +107,9 @@ void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
>   {
>   	rxe_mr_init(access, mr);
>   
> +	rxe_get(pd);
>   	mr->ibmr.pd = &pd->ibpd;
> +
>   	mr->access = access;
>   	mr->state = RXE_MR_STATE_VALID;
>   	mr->type = IB_MR_TYPE_DMA;
> @@ -125,9 +127,12 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
>   	int err;
>   	int i;
>   
> +	rxe_get(pd);
> +	mr->ibmr.pd = &pd->ibpd;
> +
>   	umem = ib_umem_get(pd->ibpd.device, start, length, access);
>   	if (IS_ERR(umem)) {
> -		pr_warn("%s: Unable to pin memory region err = %d\n",
> +		pr_debug("%s: Unable to pin memory region err = %d\n",
>   			__func__, (int)PTR_ERR(umem));
>   		err = PTR_ERR(umem);
>   		goto err_out;
> @@ -139,7 +144,7 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
>   
>   	err = rxe_mr_alloc(mr, num_buf);
>   	if (err) {
> -		pr_warn("%s: Unable to allocate memory for map\n",
> +		pr_debug("%s: Unable to allocate memory for map\n",
>   				__func__);
>   		goto err_release_umem;
>   	}
> @@ -147,7 +152,7 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
>   	mr->page_shift = PAGE_SHIFT;
>   	mr->page_mask = PAGE_SIZE - 1;
>   
> -	num_buf			= 0;
> +	num_buf = 0;
>   	map = mr->map;
>   	if (length > 0) {
>   		buf = map[0]->buf;
> @@ -161,7 +166,7 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
>   
>   			vaddr = page_address(sg_page_iter_page(&sg_iter));
>   			if (!vaddr) {
> -				pr_warn("%s: Unable to get virtual address\n",
> +				pr_debug("%s: Unable to get virtual address\n",
>   						__func__);
>   				err = -ENOMEM;
>   				goto err_cleanup_map;
> @@ -175,7 +180,6 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
>   		}
>   	}
>   
> -	mr->ibmr.pd = &pd->ibpd;
>   	mr->umem = umem;
>   	mr->access = access;
>   	mr->length = length;
> @@ -201,22 +205,21 @@ int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
>   {
>   	int err;
>   
> +	rxe_get(pd);
> +	mr->ibmr.pd = &pd->ibpd;
> +
>   	/* always allow remote access for FMRs */
>   	rxe_mr_init(IB_ACCESS_REMOTE, mr);
>   
>   	err = rxe_mr_alloc(mr, max_pages);
>   	if (err)
> -		goto err1;
> +		return err;
>   
> -	mr->ibmr.pd = &pd->ibpd;
>   	mr->max_buf = max_pages;
>   	mr->state = RXE_MR_STATE_FREE;
>   	mr->type = IB_MR_TYPE_MEM_REG;
>   
>   	return 0;
> -
> -err1:
> -	return err;
>   }
>   
>   static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
> @@ -630,6 +633,7 @@ void rxe_mr_cleanup(struct rxe_pool_elem *elem)
>   	int i;
>   
>   	rxe_put(mr_pd(mr));
> +
>   	ib_umem_release(mr->umem);
>   
>   	if (mr->map) {
> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
> index e264cf69bf55..95df3b04babc 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.c
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
> @@ -902,18 +902,15 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
>   	if (!mr)
>   		return ERR_PTR(-ENOMEM);
>   
> -	rxe_get(pd);
>   	rxe_mr_init_dma(pd, access, mr);
>   	rxe_finalize(mr);
>   
>   	return &mr->ibmr;
>   }
>   
> -static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
> -				     u64 start,
> -				     u64 length,
> -				     u64 iova,
> -				     int access, struct ib_udata *udata)
> +static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start,
> +				     u64 length, u64 iova, int access,
> +				     struct ib_udata *udata)
>   {
>   	int err;
>   	struct rxe_dev *rxe = to_rdev(ibpd->device);
> @@ -921,26 +918,19 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
>   	struct rxe_mr *mr;
>   
>   	mr = rxe_alloc(&rxe->mr_pool);
> -	if (!mr) {
> -		err = -ENOMEM;
> -		goto err2;
> -	}
> -
> -
> -	rxe_get(pd);
> +	if (!mr)
> +		return ERR_PTR(-ENOMEM);
>   
>   	err = rxe_mr_init_user(pd, start, length, iova, access, mr);
>   	if (err)
> -		goto err3;
> +		goto err_cleanup;
>   
>   	rxe_finalize(mr);
>   
>   	return &mr->ibmr;
>   
> -err3:
> -	rxe_put(pd);
> +err_cleanup:
>   	rxe_cleanup(mr);
> -err2:
>   	return ERR_PTR(err);
>   }
>   
> @@ -961,8 +951,6 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
>   		goto err1;
>   	}
>   
> -	rxe_get(pd);
> -
>   	err = rxe_mr_init_fast(pd, max_num_sg, mr);
>   	if (err)
>   		goto err2;
> @@ -972,7 +960,6 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
>   	return &mr->ibmr;
>   
>   err2:
> -	rxe_put(pd);
>   	rxe_cleanup(mr);
>   err1:
>   	return ERR_PTR(err);
>
> base-commit: 45baad7dd98f4d83f67c86c28769d3184390e324
Leon Romanovsky Sept. 5, 2022, 12:22 p.m. UTC | #2
On Thu, Sep 01, 2022 at 03:04:27PM -0500, Bob Pearson wrote:
> Move referencing pd in mr objects ahead of any possible errors
> so that it will always be set in rxe_mr_complete() to avoid
> seg faults when rxe_put(mr_pd(mr)) is called. Adjust the reference
> counts so that each call to rxe_mr_init_xxx() takes one reference.
> This reference count is dropped in rxe_mr_cleanup() in error paths
> in the reg mr verbs and the dereg mr verb. Minor white space cleanups.
> 
> These errors have been seen in rxe_mr_init_user() when there isn't
> enough memory to create the mr maps. Previously the error return
> path didn't reach setting ibpd in mr->ibmr which caused a seg fault.
> 
> Fixes: 364e282c4fe7e ("RDMA/rxe: Split MEM into MR and MW")
> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> ---
> v6:
>   Separated from other patch in original series and resubmitted
>   rebased to current for-rc.
>   Renamed from "RDMA/rxe: Set pd early in mr alloc routines" to
>   "RDMA/rxe: Fix pd ref counting in rxe mr verbs"
>   Added more text to describe the change.
>   Added fixes line.
>   Simplified the patch by leaving pd code in rxe_mr.c instead of
>   moving it up to rxe_verbs.c
> v5:
>   Dropped cleanup code from patch per Li Zhijian.
>   Split up into two small patches.
> v4:
>   Added set mr->ibmr.pd back to avoid an -ENOMEM error causing
>   rxe_put(mr_pd(mr)); to seg fault in rxe_mr_cleanup() since pd
>   is not getting set in the error path.
> v3:
>   Rebased to apply to current for-next after
>   	Revert "RDMA/rxe: Create duplicate mapping tables for FMRs"
> v2:
>   Moved setting mr->umem until after checks to avoid sending
>   an ERR_PTR to ib_umem_release().
>   Cleaned up umem and map sets if errors occur in alloc mr calls.
>   Rebased to current for-next.
> ---
>  drivers/infiniband/sw/rxe/rxe_mr.c    | 24 ++++++++++++++----------
>  drivers/infiniband/sw/rxe/rxe_verbs.c | 27 +++++++--------------------
>  2 files changed, 21 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
> index 850b80f5ad8b..5f4daffccb40 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mr.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mr.c
> @@ -107,7 +107,9 @@ void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
>  {
>  	rxe_mr_init(access, mr);
>  
> +	rxe_get(pd);

rxe_get() can fail, why don't you check for failure here and in all
places?

Thanks
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 850b80f5ad8b..5f4daffccb40 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -107,7 +107,9 @@  void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
 {
 	rxe_mr_init(access, mr);
 
+	rxe_get(pd);
 	mr->ibmr.pd = &pd->ibpd;
+
 	mr->access = access;
 	mr->state = RXE_MR_STATE_VALID;
 	mr->type = IB_MR_TYPE_DMA;
@@ -125,9 +127,12 @@  int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 	int err;
 	int i;
 
+	rxe_get(pd);
+	mr->ibmr.pd = &pd->ibpd;
+
 	umem = ib_umem_get(pd->ibpd.device, start, length, access);
 	if (IS_ERR(umem)) {
-		pr_warn("%s: Unable to pin memory region err = %d\n",
+		pr_debug("%s: Unable to pin memory region err = %d\n",
 			__func__, (int)PTR_ERR(umem));
 		err = PTR_ERR(umem);
 		goto err_out;
@@ -139,7 +144,7 @@  int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 
 	err = rxe_mr_alloc(mr, num_buf);
 	if (err) {
-		pr_warn("%s: Unable to allocate memory for map\n",
+		pr_debug("%s: Unable to allocate memory for map\n",
 				__func__);
 		goto err_release_umem;
 	}
@@ -147,7 +152,7 @@  int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 	mr->page_shift = PAGE_SHIFT;
 	mr->page_mask = PAGE_SIZE - 1;
 
-	num_buf			= 0;
+	num_buf = 0;
 	map = mr->map;
 	if (length > 0) {
 		buf = map[0]->buf;
@@ -161,7 +166,7 @@  int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 
 			vaddr = page_address(sg_page_iter_page(&sg_iter));
 			if (!vaddr) {
-				pr_warn("%s: Unable to get virtual address\n",
+				pr_debug("%s: Unable to get virtual address\n",
 						__func__);
 				err = -ENOMEM;
 				goto err_cleanup_map;
@@ -175,7 +180,6 @@  int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 		}
 	}
 
-	mr->ibmr.pd = &pd->ibpd;
 	mr->umem = umem;
 	mr->access = access;
 	mr->length = length;
@@ -201,22 +205,21 @@  int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
 {
 	int err;
 
+	rxe_get(pd);
+	mr->ibmr.pd = &pd->ibpd;
+
 	/* always allow remote access for FMRs */
 	rxe_mr_init(IB_ACCESS_REMOTE, mr);
 
 	err = rxe_mr_alloc(mr, max_pages);
 	if (err)
-		goto err1;
+		return err;
 
-	mr->ibmr.pd = &pd->ibpd;
 	mr->max_buf = max_pages;
 	mr->state = RXE_MR_STATE_FREE;
 	mr->type = IB_MR_TYPE_MEM_REG;
 
 	return 0;
-
-err1:
-	return err;
 }
 
 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
@@ -630,6 +633,7 @@  void rxe_mr_cleanup(struct rxe_pool_elem *elem)
 	int i;
 
 	rxe_put(mr_pd(mr));
+
 	ib_umem_release(mr->umem);
 
 	if (mr->map) {
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index e264cf69bf55..95df3b04babc 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -902,18 +902,15 @@  static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 
-	rxe_get(pd);
 	rxe_mr_init_dma(pd, access, mr);
 	rxe_finalize(mr);
 
 	return &mr->ibmr;
 }
 
-static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
-				     u64 start,
-				     u64 length,
-				     u64 iova,
-				     int access, struct ib_udata *udata)
+static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start,
+				     u64 length, u64 iova, int access,
+				     struct ib_udata *udata)
 {
 	int err;
 	struct rxe_dev *rxe = to_rdev(ibpd->device);
@@ -921,26 +918,19 @@  static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
 	struct rxe_mr *mr;
 
 	mr = rxe_alloc(&rxe->mr_pool);
-	if (!mr) {
-		err = -ENOMEM;
-		goto err2;
-	}
-
-
-	rxe_get(pd);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
 
 	err = rxe_mr_init_user(pd, start, length, iova, access, mr);
 	if (err)
-		goto err3;
+		goto err_cleanup;
 
 	rxe_finalize(mr);
 
 	return &mr->ibmr;
 
-err3:
-	rxe_put(pd);
+err_cleanup:
 	rxe_cleanup(mr);
-err2:
 	return ERR_PTR(err);
 }
 
@@ -961,8 +951,6 @@  static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
 		goto err1;
 	}
 
-	rxe_get(pd);
-
 	err = rxe_mr_init_fast(pd, max_num_sg, mr);
 	if (err)
 		goto err2;
@@ -972,7 +960,6 @@  static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
 	return &mr->ibmr;
 
 err2:
-	rxe_put(pd);
 	rxe_cleanup(mr);
 err1:
 	return ERR_PTR(err);