diff mbox series

[RESEND,rdma-rc] RDMA/mlx5: Add dummy umem to IB_MR_TYPE_DM

Message ID e13b7014857ea296285ee5cfcdaaada9007f6978.1634638695.git.leonro@nvidia.com (mailing list archive)
State Changes Requested
Delegated to: Jason Gunthorpe
Headers show
Series [RESEND,rdma-rc] RDMA/mlx5: Add dummy umem to IB_MR_TYPE_DM | expand

Commit Message

Leon Romanovsky Oct. 19, 2021, 10:23 a.m. UTC
From: Alaa Hleihel <alaa@nvidia.com>

After the cited patch, and for the case of IB_MR_TYPE_DM that doesn't
have a umem (even though it is a user MR), function mlx5_free_priv_descs()
will think that it's a kernel MR, leading to wrongly accessing mr->descs
that will get wrong values in the union which leads to attempting to
release resources that were not allocated in the first place.

For example:
 DMA-API: mlx5_core 0000:08:00.1: device driver tries to free DMA memory it has not allocated [device address=0x0000000000000000] [size=0 bytes]
 WARNING: CPU: 8 PID: 1021 at kernel/dma/debug.c:961 check_unmap+0x54f/0x8b0
 RIP: 0010:check_unmap+0x54f/0x8b0
 Call Trace:
  debug_dma_unmap_page+0x57/0x60
  mlx5_free_priv_descs+0x57/0x70 [mlx5_ib]
  mlx5_ib_dereg_mr+0x1fb/0x3d0 [mlx5_ib]
  ib_dereg_mr_user+0x60/0x140 [ib_core]
  uverbs_destroy_uobject+0x59/0x210 [ib_uverbs]
  uobj_destroy+0x3f/0x80 [ib_uverbs]
  ib_uverbs_cmd_verbs+0x435/0xd10 [ib_uverbs]
  ? uverbs_finalize_object+0x50/0x50 [ib_uverbs]
  ? lock_acquire+0xc4/0x2e0
  ? lock_acquired+0x12/0x380
  ? lock_acquire+0xc4/0x2e0
  ? lock_acquire+0xc4/0x2e0
  ? ib_uverbs_ioctl+0x7c/0x140 [ib_uverbs]
  ? lock_release+0x28a/0x400
  ib_uverbs_ioctl+0xc0/0x140 [ib_uverbs]
  ? ib_uverbs_ioctl+0x7c/0x140 [ib_uverbs]
  __x64_sys_ioctl+0x7f/0xb0
  do_syscall_64+0x38/0x90

Fix it by adding a dummy umem to IB_MR_TYPE_DM MRs.

Fixes: f18ec4223117 ("RDMA/mlx5: Use a union inside mlx5_ib_mr")
Signed-off-by: Alaa Hleihel <alaa@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
RESEND: https://lore.kernel.org/all/9c6478b70dc23cfec3a7bfc345c30ff817e7e799.1631660866.git.leonro@nvidia.com

Our request to drop that original patch was because mr-->umem pointer is checked
in rereg flow for the DM MRs with expectation to have NULL there. However DM is
blocked for the rereg path in the commit 5ccbf63f87a3 ("IB/uverbs: Prevent
reregistration of DM_MR to regular MR"), and the checks in mlx5_ib are redundant.

Thanks
---
 drivers/infiniband/core/umem.c  | 21 +++++++++++++++++++++
 drivers/infiniband/hw/mlx5/mr.c |  5 +++++
 include/rdma/ib_umem.h          |  5 +++++
 3 files changed, 31 insertions(+)

Comments

Jason Gunthorpe Oct. 25, 2021, 5:31 p.m. UTC | #1
On Tue, Oct 19, 2021 at 01:23:13PM +0300, Leon Romanovsky wrote:
> From: Alaa Hleihel <alaa@nvidia.com>
> 
> After the cited patch, and for the case of IB_MR_TYPE_DM that doesn't
> have a umem (even though it is a user MR), function mlx5_free_priv_descs()
> will think that it's a kernel MR, leading to wrongly accessing mr->descs
> that will get wrong values in the union which leads to attempting to
> release resources that were not allocated in the first place.
> 
> For example:
>  DMA-API: mlx5_core 0000:08:00.1: device driver tries to free DMA memory it has not allocated [device address=0x0000000000000000] [size=0 bytes]
>  WARNING: CPU: 8 PID: 1021 at kernel/dma/debug.c:961 check_unmap+0x54f/0x8b0
>  RIP: 0010:check_unmap+0x54f/0x8b0
>  Call Trace:
>   debug_dma_unmap_page+0x57/0x60
>   mlx5_free_priv_descs+0x57/0x70 [mlx5_ib]
>   mlx5_ib_dereg_mr+0x1fb/0x3d0 [mlx5_ib]
>   ib_dereg_mr_user+0x60/0x140 [ib_core]
>   uverbs_destroy_uobject+0x59/0x210 [ib_uverbs]
>   uobj_destroy+0x3f/0x80 [ib_uverbs]
>   ib_uverbs_cmd_verbs+0x435/0xd10 [ib_uverbs]
>   ? uverbs_finalize_object+0x50/0x50 [ib_uverbs]
>   ? lock_acquire+0xc4/0x2e0
>   ? lock_acquired+0x12/0x380
>   ? lock_acquire+0xc4/0x2e0
>   ? lock_acquire+0xc4/0x2e0
>   ? ib_uverbs_ioctl+0x7c/0x140 [ib_uverbs]
>   ? lock_release+0x28a/0x400
>   ib_uverbs_ioctl+0xc0/0x140 [ib_uverbs]
>   ? ib_uverbs_ioctl+0x7c/0x140 [ib_uverbs]
>   __x64_sys_ioctl+0x7f/0xb0
>   do_syscall_64+0x38/0x90
> 
> Fix it by adding a dummy umem to IB_MR_TYPE_DM MRs.
> 
> Fixes: f18ec4223117 ("RDMA/mlx5: Use a union inside mlx5_ib_mr")
> Signed-off-by: Alaa Hleihel <alaa@nvidia.com>
> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> RESEND: https://lore.kernel.org/all/9c6478b70dc23cfec3a7bfc345c30ff817e7e799.1631660866.git.leonro@nvidia.com
> 
> Our request to drop that original patch was because mr-->umem pointer is checked
> in rereg flow for the DM MRs with expectation to have NULL there. However DM is
> blocked for the rereg path in the commit 5ccbf63f87a3 ("IB/uverbs: Prevent
> reregistration of DM_MR to regular MR"), and the checks in mlx5_ib are redundant.

That logic in the core code is bogus and should be deleted.

It is perfeclty fine to use rereg to change the access flags on a DM
MR and mlx5 now implements that.

So let's not go down a path that blocks it.

Like this instead:

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index e636e954f6bf2a..4a7a56ed740b9b 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -664,7 +664,6 @@ struct mlx5_ib_mr {
 
 	/* User MR data */
 	struct mlx5_cache_ent *cache_ent;
-	struct ib_umem *umem;
 
 	/* This is zero'd when the MR is allocated */
 	union {
@@ -676,7 +675,7 @@ struct mlx5_ib_mr {
 			struct list_head list;
 		};
 
-		/* Used only by kernel MRs (umem == NULL) */
+		/* Used only by kernel MRs */
 		struct {
 			void *descs;
 			void *descs_alloc;
@@ -697,8 +696,9 @@ struct mlx5_ib_mr {
 			int data_length;
 		};
 
-		/* Used only by User MRs (umem != NULL) */
+		/* Used only by User MRs */
 		struct {
+			struct ib_umem *umem;
 			unsigned int page_shift;
 			/* Current access_flags */
 			int access_flags;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 221f0949794e35..997d133d00369d 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1904,19 +1904,19 @@ mlx5_alloc_priv_descs(struct ib_device *device,
 	return ret;
 }
 
-static void
-mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
+static void mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
 {
-	if (!mr->umem && mr->descs) {
-		struct ib_device *device = mr->ibmr.device;
-		int size = mr->max_descs * mr->desc_size;
-		struct mlx5_ib_dev *dev = to_mdev(device);
+	struct ib_device *device = mr->ibmr.device;
+	int size = mr->max_descs * mr->desc_size;
+	struct mlx5_ib_dev *dev = to_mdev(device);
 
-		dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
-				 DMA_TO_DEVICE);
-		kfree(mr->descs_alloc);
-		mr->descs = NULL;
-	}
+	if (!mr->descs)
+		return;
+
+	dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
+			 DMA_TO_DEVICE);
+	kfree(mr->descs_alloc);
+	mr->descs = NULL;
 }
 
 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
@@ -1978,7 +1978,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 			return rc;
 	}
 
-	if (mr->umem) {
+	if (udata && mr->umem) {
 		bool is_odp = is_odp_mr(mr);
 
 		if (!is_odp)
@@ -1992,7 +1992,8 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 	if (mr->cache_ent) {
 		mlx5_mr_cache_free(dev, mr);
 	} else {
-		mlx5_free_priv_descs(mr);
+		if (!udata)
+			mlx5_free_priv_descs(mr);
 		kfree(mr);
 	}
 	return 0;
@@ -2079,7 +2080,6 @@ static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
 	if (err)
 		goto err_free_in;
 
-	mr->umem = NULL;
 	kfree(in);
 
 	return mr;
@@ -2206,7 +2206,6 @@ static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
 	}
 
 	mr->ibmr.device = pd->device;
-	mr->umem = NULL;
 
 	switch (mr_type) {
 	case IB_MR_TYPE_MEM_REG:
Leon Romanovsky Oct. 26, 2021, 11:23 a.m. UTC | #2
On Mon, Oct 25, 2021 at 02:31:50PM -0300, Jason Gunthorpe wrote:
> On Tue, Oct 19, 2021 at 01:23:13PM +0300, Leon Romanovsky wrote:
> > From: Alaa Hleihel <alaa@nvidia.com>
> > 
> > After the cited patch, and for the case of IB_MR_TYPE_DM that doesn't
> > have a umem (even though it is a user MR), function mlx5_free_priv_descs()
> > will think that it's a kernel MR, leading to wrongly accessing mr->descs
> > that will get wrong values in the union which leads to attempting to
> > release resources that were not allocated in the first place.
> > 
> > For example:
> >  DMA-API: mlx5_core 0000:08:00.1: device driver tries to free DMA memory it has not allocated [device address=0x0000000000000000] [size=0 bytes]
> >  WARNING: CPU: 8 PID: 1021 at kernel/dma/debug.c:961 check_unmap+0x54f/0x8b0
> >  RIP: 0010:check_unmap+0x54f/0x8b0
> >  Call Trace:
> >   debug_dma_unmap_page+0x57/0x60
> >   mlx5_free_priv_descs+0x57/0x70 [mlx5_ib]
> >   mlx5_ib_dereg_mr+0x1fb/0x3d0 [mlx5_ib]
> >   ib_dereg_mr_user+0x60/0x140 [ib_core]
> >   uverbs_destroy_uobject+0x59/0x210 [ib_uverbs]
> >   uobj_destroy+0x3f/0x80 [ib_uverbs]
> >   ib_uverbs_cmd_verbs+0x435/0xd10 [ib_uverbs]
> >   ? uverbs_finalize_object+0x50/0x50 [ib_uverbs]
> >   ? lock_acquire+0xc4/0x2e0
> >   ? lock_acquired+0x12/0x380
> >   ? lock_acquire+0xc4/0x2e0
> >   ? lock_acquire+0xc4/0x2e0
> >   ? ib_uverbs_ioctl+0x7c/0x140 [ib_uverbs]
> >   ? lock_release+0x28a/0x400
> >   ib_uverbs_ioctl+0xc0/0x140 [ib_uverbs]
> >   ? ib_uverbs_ioctl+0x7c/0x140 [ib_uverbs]
> >   __x64_sys_ioctl+0x7f/0xb0
> >   do_syscall_64+0x38/0x90
> > 
> > Fix it by adding a dummy umem to IB_MR_TYPE_DM MRs.
> > 
> > Fixes: f18ec4223117 ("RDMA/mlx5: Use a union inside mlx5_ib_mr")
> > Signed-off-by: Alaa Hleihel <alaa@nvidia.com>
> > Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> > RESEND: https://lore.kernel.org/all/9c6478b70dc23cfec3a7bfc345c30ff817e7e799.1631660866.git.leonro@nvidia.com
> > 
> > Our request to drop that original patch was because mr-->umem pointer is checked
> > in rereg flow for the DM MRs with expectation to have NULL there. However DM is
> > blocked for the rereg path in the commit 5ccbf63f87a3 ("IB/uverbs: Prevent
> > reregistration of DM_MR to regular MR"), and the checks in mlx5_ib are redundant.
> 
> That logic in the core code is bogus and should be deleted.
> 
> It is perfeclty fine to use rereg to change the access flags on a DM
> MR and mlx5 now implements that.
> 
> So let's not go down a path that blocks it.
> 
> Like this instead:

Thanks, let's give a try.

> 
> diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> index e636e954f6bf2a..4a7a56ed740b9b 100644
> --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
> +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> @@ -664,7 +664,6 @@ struct mlx5_ib_mr {
>  
>  	/* User MR data */
>  	struct mlx5_cache_ent *cache_ent;
> -	struct ib_umem *umem;
>  
>  	/* This is zero'd when the MR is allocated */
>  	union {
> @@ -676,7 +675,7 @@ struct mlx5_ib_mr {
>  			struct list_head list;
>  		};
>  
> -		/* Used only by kernel MRs (umem == NULL) */
> +		/* Used only by kernel MRs */
>  		struct {
>  			void *descs;
>  			void *descs_alloc;
> @@ -697,8 +696,9 @@ struct mlx5_ib_mr {
>  			int data_length;
>  		};
>  
> -		/* Used only by User MRs (umem != NULL) */
> +		/* Used only by User MRs */
>  		struct {
> +			struct ib_umem *umem;
>  			unsigned int page_shift;
>  			/* Current access_flags */
>  			int access_flags;
> diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
> index 221f0949794e35..997d133d00369d 100644
> --- a/drivers/infiniband/hw/mlx5/mr.c
> +++ b/drivers/infiniband/hw/mlx5/mr.c
> @@ -1904,19 +1904,19 @@ mlx5_alloc_priv_descs(struct ib_device *device,
>  	return ret;
>  }
>  
> -static void
> -mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
> +static void mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
>  {
> -	if (!mr->umem && mr->descs) {
> -		struct ib_device *device = mr->ibmr.device;
> -		int size = mr->max_descs * mr->desc_size;
> -		struct mlx5_ib_dev *dev = to_mdev(device);
> +	struct ib_device *device = mr->ibmr.device;
> +	int size = mr->max_descs * mr->desc_size;
> +	struct mlx5_ib_dev *dev = to_mdev(device);
>  
> -		dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
> -				 DMA_TO_DEVICE);
> -		kfree(mr->descs_alloc);
> -		mr->descs = NULL;
> -	}
> +	if (!mr->descs)
> +		return;
> +
> +	dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
> +			 DMA_TO_DEVICE);
> +	kfree(mr->descs_alloc);
> +	mr->descs = NULL;
>  }
>  
>  int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
> @@ -1978,7 +1978,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
>  			return rc;
>  	}
>  
> -	if (mr->umem) {
> +	if (udata && mr->umem) {
>  		bool is_odp = is_odp_mr(mr);
>  
>  		if (!is_odp)
> @@ -1992,7 +1992,8 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
>  	if (mr->cache_ent) {
>  		mlx5_mr_cache_free(dev, mr);
>  	} else {
> -		mlx5_free_priv_descs(mr);
> +		if (!udata)
> +			mlx5_free_priv_descs(mr);
>  		kfree(mr);
>  	}
>  	return 0;
> @@ -2079,7 +2080,6 @@ static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
>  	if (err)
>  		goto err_free_in;
>  
> -	mr->umem = NULL;
>  	kfree(in);
>  
>  	return mr;
> @@ -2206,7 +2206,6 @@ static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
>  	}
>  
>  	mr->ibmr.device = pd->device;
> -	mr->umem = NULL;
>  
>  	switch (mr_type) {
>  	case IB_MR_TYPE_MEM_REG:
diff mbox series

Patch

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 86d479772fbc..a2f9c922bdd9 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -260,6 +260,27 @@  struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
 }
 EXPORT_SYMBOL(ib_umem_get);
 
+/**
+ * ib_umem_get_dummy - Create an empty umem
+ *
+ * @device: IB device to connect UMEM
+ */
+struct ib_umem *ib_umem_get_dummy(struct ib_device *device)
+{
+	struct ib_umem *umem;
+
+	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+	if (!umem)
+		return ERR_PTR(-ENOMEM);
+
+	umem->ibdev = device;
+	umem->owning_mm = current->mm;
+	mmgrab(umem->owning_mm);
+
+	return umem;
+}
+EXPORT_SYMBOL(ib_umem_get_dummy);
+
 /**
  * ib_umem_release - release memory pinned with ib_umem_get
  * @umem: umem struct to release
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 3be36ebbf67a..6fbc281a8881 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1389,6 +1389,11 @@  static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
 	kfree(in);
 
 	set_mr_fields(dev, mr, length, acc);
+	mr->umem = ib_umem_get_dummy(&dev->ib_dev);
+	if (IS_ERR(mr->umem)) {
+		err = PTR_ERR(mr->umem);
+		goto err_free;
+	}
 
 	return &mr->ibmr;
 
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 5ae9dff74dac..fd20b1610050 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -98,6 +98,7 @@  static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
 
 struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
 			    size_t size, int access);
+struct ib_umem *ib_umem_get_dummy(struct ib_device *device);
 void ib_umem_release(struct ib_umem *umem);
 int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
 		      size_t length);
@@ -153,6 +154,10 @@  static inline struct ib_umem *ib_umem_get(struct ib_device *device,
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
+static struct ib_umem *ib_umem_get_dummy(struct ib_device *device)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
 static inline void ib_umem_release(struct ib_umem *umem) { }
 static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
 		      		    size_t length) {