diff mbox series

[rdma-next,v1,5/6] RDMA/mlx5: Change check for cacheable user mkeys

Message ID 20dc8ea1c606351b0491240d857977f724084345.1706433934.git.leon@kernel.org (mailing list archive)
State Superseded
Headers show
Series Collection of mlx5_ib fixes | expand

Commit Message

Leon Romanovsky Jan. 28, 2024, 9:29 a.m. UTC
From: Or Har-Toov <ohartoov@nvidia.com>

In the dereg flow, UMEM is not a good enough indication whether an MR
is from userspace since in mlx5_ib_rereg_user_mr there are some cases
when a new MR is created and the UMEM of the old MR is set to NULL.
Currently when mlx5_ib_dereg_mr is called on the old MR, UMEM is NULL
but cache_ent can be different than NULL. So, the mkey will not be
destroyed.
Therefore checking if mkey is from user application and cacheable
should be done by checking if rb_key or cache_ent exist and all other kind of
mkeys should be destroyed.

Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow")
Signed-off-by: Or Har-Toov <ohartoov@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mr.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

Comments

Jason Gunthorpe Jan. 29, 2024, 5:52 p.m. UTC | #1
On Sun, Jan 28, 2024 at 11:29:15AM +0200, Leon Romanovsky wrote:
> From: Or Har-Toov <ohartoov@nvidia.com>
> 
> In the dereg flow, UMEM is not a good enough indication whether an MR
> is from userspace since in mlx5_ib_rereg_user_mr there are some cases
> when a new MR is created and the UMEM of the old MR is set to NULL.

Why is this a problem though? The only thing the umem has to do is to
trigger the UMR optimization. If UMR is not triggered then the mkey is
destroyed and it shouldn't be part of the cache at all.

> Currently when mlx5_ib_dereg_mr is called on the old MR, UMEM is NULL
> but cache_ent can be different than NULL. So, the mkey will not be
> destroyed.
> Therefore checking if mkey is from user application and cacheable
> should be done by checking if rb_key or cache_ent exist and all other kind of
> mkeys should be destroyed.
> 
> Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow")
> Signed-off-by: Or Har-Toov <ohartoov@nvidia.com>
> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> ---
>  drivers/infiniband/hw/mlx5/mr.c | 15 ++++++++-------
>  1 file changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
> index 12bca6ca4760..87552a689e07 100644
> --- a/drivers/infiniband/hw/mlx5/mr.c
> +++ b/drivers/infiniband/hw/mlx5/mr.c
> @@ -1857,6 +1857,11 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
>  	return ret;
>  }
>  
> +static bool is_cacheable_mkey(struct mlx5_ib_mkey *mkey)
> +{
> +	return mkey->cache_ent || mkey->rb_key.ndescs;
> +}
> +
>  int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
>  {
>  	struct mlx5_ib_mr *mr = to_mmr(ibmr);
> @@ -1901,12 +1906,6 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
>  		mr->sig = NULL;
>  	}
>  
> -	/* Stop DMA */
> -	if (mr->umem && mlx5r_umr_can_load_pas(dev, mr->umem->length))
> -		if (mlx5r_umr_revoke_mr(mr) ||
> -		    cache_ent_find_and_store(dev, mr))
> -			mr->mmkey.cache_ent = NULL;
> -
>  	if (mr->umem && mr->umem->is_peer) {
>  		rc = mlx5r_umr_revoke_mr(mr);
>  		if (rc)

?? this isn't based on an upstream tree

> @@ -1914,7 +1913,9 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
>  		ib_umem_stop_invalidation_notifier(mr->umem);
>  	}
>  
> -	if (!mr->mmkey.cache_ent) {
> +	/* Stop DMA */
> +	if (!is_cacheable_mkey(&mr->mmkey) || mlx5r_umr_revoke_mr(mr) ||
> +	    cache_ent_find_and_store(dev, mr)) {

And now the mlx5r_umr_can_load_pas() can been lost, that isn't good. A
non-umr-able object should never be placed in the cache. If the mkey's
size is too big it has to be freed normally.

>  		rc = destroy_mkey(to_mdev(mr->ibmr.device), mr);
>  		if (rc)
>  			return rc;

I'm not sure it is right to re-order this? The revokation of a mkey
should be a single operation, which ever path we choose to take..

Regardless the upstream code doesn't have this ordering so it should
all be one sequence of revoking the mkey and synchronizing the cache.

I suggest to put the revoke sequence into one function:

static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
{
	struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);

	if (mr->umem && mlx5r_umr_can_load_pas(dev, mr->umem->length)) {
		if (mlx5r_umr_revoke_mr(mr))
			goto destroy;

		if (cache_ent_find_and_store(dev, mr))
			goto destroy;
		return 0;
	}

destroy:
	if (mr->mmkey.cache_ent) {
		spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
		mr->mmkey.cache_ent->in_use--;
		mr->mmkey.cache_ent = NULL;
		spin_unlock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
	}
	return destroy_mkey(dev, mr);
}

(notice we probably shouldn't set cache_ent to null without adjusting in_use)

Jason
Leon Romanovsky Jan. 30, 2024, 1:47 p.m. UTC | #2
On Mon, Jan 29, 2024 at 01:52:39PM -0400, Jason Gunthorpe wrote:
> On Sun, Jan 28, 2024 at 11:29:15AM +0200, Leon Romanovsky wrote:
> > From: Or Har-Toov <ohartoov@nvidia.com>

<...>

> >  	if (mr->umem && mr->umem->is_peer) {
> >  		rc = mlx5r_umr_revoke_mr(mr);
> >  		if (rc)
> 
> ?? this isn't based on an upstream tree

Yes, it is my mistake. I will fix it.

Thanks
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 12bca6ca4760..87552a689e07 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1857,6 +1857,11 @@  static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
 	return ret;
 }
 
+static bool is_cacheable_mkey(struct mlx5_ib_mkey *mkey)
+{
+	return mkey->cache_ent || mkey->rb_key.ndescs;
+}
+
 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 {
 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
@@ -1901,12 +1906,6 @@  int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 		mr->sig = NULL;
 	}
 
-	/* Stop DMA */
-	if (mr->umem && mlx5r_umr_can_load_pas(dev, mr->umem->length))
-		if (mlx5r_umr_revoke_mr(mr) ||
-		    cache_ent_find_and_store(dev, mr))
-			mr->mmkey.cache_ent = NULL;
-
 	if (mr->umem && mr->umem->is_peer) {
 		rc = mlx5r_umr_revoke_mr(mr);
 		if (rc)
@@ -1914,7 +1913,9 @@  int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 		ib_umem_stop_invalidation_notifier(mr->umem);
 	}
 
-	if (!mr->mmkey.cache_ent) {
+	/* Stop DMA */
+	if (!is_cacheable_mkey(&mr->mmkey) || mlx5r_umr_revoke_mr(mr) ||
+	    cache_ent_find_and_store(dev, mr)) {
 		rc = destroy_mkey(to_mdev(mr->ibmr.device), mr);
 		if (rc)
 			return rc;