diff mbox series

drm/panthor: Lock XArray when getting entries for heap and VM

Message ID 20241106120748.290697-1-liviu.dudau@arm.com (mailing list archive)
State New, archived
Headers show
Series drm/panthor: Lock XArray when getting entries for heap and VM | expand

Commit Message

Liviu Dudau Nov. 6, 2024, 12:07 p.m. UTC
Similar to cac075706f29 ("drm/panthor: Fix race when converting
group handle to group object") we need to use the XArray's internal
locking when retrieving a pointer from there for heap and vm.

Reported-by: Jann Horn <jannh@google.com>
Cc: Boris Brezillon <boris.brezillon@collabora.com>
Cc: Steven Price <steven.price@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
---
 drivers/gpu/drm/panthor/panthor_heap.c | 15 +++++++++++++--
 drivers/gpu/drm/panthor/panthor_mmu.c  |  2 ++
 2 files changed, 15 insertions(+), 2 deletions(-)

Comments

Mihail Atanassov Nov. 6, 2024, 12:14 p.m. UTC | #1
Hi Liviu,

On 06/11/2024 12:07, Liviu Dudau wrote:
> Similar to cac075706f29 ("drm/panthor: Fix race when converting
> group handle to group object") we need to use the XArray's internal
> locking when retrieving a pointer from there for heap and vm.
> 
> Reported-by: Jann Horn <jannh@google.com>
> Cc: Boris Brezillon <boris.brezillon@collabora.com>
> Cc: Steven Price <steven.price@arm.com>
> Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
> ---
>   drivers/gpu/drm/panthor/panthor_heap.c | 15 +++++++++++++--
>   drivers/gpu/drm/panthor/panthor_mmu.c  |  2 ++
>   2 files changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c
> index 3796a9eb22af2..fe0bcb6837f74 100644
> --- a/drivers/gpu/drm/panthor/panthor_heap.c
> +++ b/drivers/gpu/drm/panthor/panthor_heap.c
> @@ -351,6 +351,17 @@ int panthor_heap_create(struct panthor_heap_pool *pool,
>   	return ret;
>   }
>   
> +static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
> +{
> +	struct panthor_heap *heap;
> +
> +	xa_lock(&pool->xa);
> +	heap = xa_load(&pool->xa, id);
> +	xa_unlock(&pool->va);
> +
> +	return heap;
> +}
> +
>   /**
>    * panthor_heap_return_chunk() - Return an unused heap chunk
>    * @pool: The pool this heap belongs to.
> @@ -375,7 +386,7 @@ int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
>   		return -EINVAL;
>   
>   	down_read(&pool->lock);
> -	heap = xa_load(&pool->xa, heap_id);
> +	heap = panthor_heap_from_id(pool, heap_id);
>   	if (!heap) {
>   		ret = -EINVAL;
>   		goto out_unlock;
> @@ -438,7 +449,7 @@ int panthor_heap_grow(struct panthor_heap_pool *pool,
>   		return -EINVAL;
>   
>   	down_read(&pool->lock);
> -	heap = xa_load(&pool->xa, heap_id);
> +	heap = panthor_heap_from_id(pool, heap_id);
>   	if (!heap) {
>   		ret = -EINVAL;
>   		goto out_unlock;
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> index 8ca85526491e6..8b5cda9d21768 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -1580,7 +1580,9 @@ panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle)
>   {
>   	struct panthor_vm *vm;
>   
> +	xa_lock(&pool->xa);
>   	vm = panthor_vm_get(xa_load(&pool->xa, handle));
> +	xa_unlock(&pool->va);
>   
>   	return vm;
>   }

Reviewed-by: Mihail Atanassov <mihail.atanassov@arm.com>
Boris Brezillon Nov. 6, 2024, 12:16 p.m. UTC | #2
On Wed,  6 Nov 2024 12:07:48 +0000
Liviu Dudau <liviu.dudau@arm.com> wrote:

> Similar to cac075706f29 ("drm/panthor: Fix race when converting
> group handle to group object") we need to use the XArray's internal
> locking when retrieving a pointer from there for heap and vm.
> 
> Reported-by: Jann Horn <jannh@google.com>
> Cc: Boris Brezillon <boris.brezillon@collabora.com>
> Cc: Steven Price <steven.price@arm.com>
> Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
> ---
>  drivers/gpu/drm/panthor/panthor_heap.c | 15 +++++++++++++--
>  drivers/gpu/drm/panthor/panthor_mmu.c  |  2 ++
>  2 files changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c
> index 3796a9eb22af2..fe0bcb6837f74 100644
> --- a/drivers/gpu/drm/panthor/panthor_heap.c
> +++ b/drivers/gpu/drm/panthor/panthor_heap.c
> @@ -351,6 +351,17 @@ int panthor_heap_create(struct panthor_heap_pool *pool,
>  	return ret;
>  }
>  
> +static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)

struct pathor_heap_pool does not exist :-).

> +{
> +	struct panthor_heap *heap;
> +
> +	xa_lock(&pool->xa);
> +	heap = xa_load(&pool->xa, id);
> +	xa_unlock(&pool->va);

Access to panthor_heap_pool::xa is protected by the external
pathor_heap_pool::lock, so taking the xa lock seems redundant here. How
about adding a lockdep_assert_held(pool->lock) instead?

> +
> +	return heap;
> +}
> +
>  /**
>   * panthor_heap_return_chunk() - Return an unused heap chunk
>   * @pool: The pool this heap belongs to.
> @@ -375,7 +386,7 @@ int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
>  		return -EINVAL;
>  
>  	down_read(&pool->lock);
> -	heap = xa_load(&pool->xa, heap_id);
> +	heap = panthor_heap_from_id(pool, heap_id);
>  	if (!heap) {
>  		ret = -EINVAL;
>  		goto out_unlock;
> @@ -438,7 +449,7 @@ int panthor_heap_grow(struct panthor_heap_pool *pool,
>  		return -EINVAL;
>  
>  	down_read(&pool->lock);
> -	heap = xa_load(&pool->xa, heap_id);
> +	heap = panthor_heap_from_id(pool, heap_id);
>  	if (!heap) {
>  		ret = -EINVAL;
>  		goto out_unlock;
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> index 8ca85526491e6..8b5cda9d21768 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -1580,7 +1580,9 @@ panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle)
>  {
>  	struct panthor_vm *vm;
>  
> +	xa_lock(&pool->xa);
>  	vm = panthor_vm_get(xa_load(&pool->xa, handle));
> +	xa_unlock(&pool->va);
>  
>  	return vm;
>  }
Boris Brezillon Nov. 6, 2024, 12:17 p.m. UTC | #3
On Wed,  6 Nov 2024 12:07:48 +0000
Liviu Dudau <liviu.dudau@arm.com> wrote:

> Similar to cac075706f29 ("drm/panthor: Fix race when converting
> group handle to group object") we need to use the XArray's internal
> locking when retrieving a pointer from there for heap and vm.
> 
> Reported-by: Jann Horn <jannh@google.com>
> Cc: Boris Brezillon <boris.brezillon@collabora.com>
> Cc: Steven Price <steven.price@arm.com>
> Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>

Missing Fixes tag.

> ---
>  drivers/gpu/drm/panthor/panthor_heap.c | 15 +++++++++++++--
>  drivers/gpu/drm/panthor/panthor_mmu.c  |  2 ++
>  2 files changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c
> index 3796a9eb22af2..fe0bcb6837f74 100644
> --- a/drivers/gpu/drm/panthor/panthor_heap.c
> +++ b/drivers/gpu/drm/panthor/panthor_heap.c
> @@ -351,6 +351,17 @@ int panthor_heap_create(struct panthor_heap_pool *pool,
>  	return ret;
>  }
>  
> +static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
> +{
> +	struct panthor_heap *heap;
> +
> +	xa_lock(&pool->xa);
> +	heap = xa_load(&pool->xa, id);
> +	xa_unlock(&pool->va);
> +
> +	return heap;
> +}
> +
>  /**
>   * panthor_heap_return_chunk() - Return an unused heap chunk
>   * @pool: The pool this heap belongs to.
> @@ -375,7 +386,7 @@ int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
>  		return -EINVAL;
>  
>  	down_read(&pool->lock);
> -	heap = xa_load(&pool->xa, heap_id);
> +	heap = panthor_heap_from_id(pool, heap_id);
>  	if (!heap) {
>  		ret = -EINVAL;
>  		goto out_unlock;
> @@ -438,7 +449,7 @@ int panthor_heap_grow(struct panthor_heap_pool *pool,
>  		return -EINVAL;
>  
>  	down_read(&pool->lock);
> -	heap = xa_load(&pool->xa, heap_id);
> +	heap = panthor_heap_from_id(pool, heap_id);
>  	if (!heap) {
>  		ret = -EINVAL;
>  		goto out_unlock;
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> index 8ca85526491e6..8b5cda9d21768 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -1580,7 +1580,9 @@ panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle)
>  {
>  	struct panthor_vm *vm;
>  
> +	xa_lock(&pool->xa);
>  	vm = panthor_vm_get(xa_load(&pool->xa, handle));
> +	xa_unlock(&pool->va);
>  
>  	return vm;
>  }
Liviu Dudau Nov. 6, 2024, 1:10 p.m. UTC | #4
On Wed, Nov 06, 2024 at 01:16:41PM +0100, Boris Brezillon wrote:
> On Wed,  6 Nov 2024 12:07:48 +0000
> Liviu Dudau <liviu.dudau@arm.com> wrote:
> 
> > Similar to cac075706f29 ("drm/panthor: Fix race when converting
> > group handle to group object") we need to use the XArray's internal
> > locking when retrieving a pointer from there for heap and vm.
> > 
> > Reported-by: Jann Horn <jannh@google.com>
> > Cc: Boris Brezillon <boris.brezillon@collabora.com>
> > Cc: Steven Price <steven.price@arm.com>
> > Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>

From the other email: I will add the Fixes tag for next version.

> > ---
> >  drivers/gpu/drm/panthor/panthor_heap.c | 15 +++++++++++++--
> >  drivers/gpu/drm/panthor/panthor_mmu.c  |  2 ++
> >  2 files changed, 15 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c
> > index 3796a9eb22af2..fe0bcb6837f74 100644
> > --- a/drivers/gpu/drm/panthor/panthor_heap.c
> > +++ b/drivers/gpu/drm/panthor/panthor_heap.c
> > @@ -351,6 +351,17 @@ int panthor_heap_create(struct panthor_heap_pool *pool,
> >  	return ret;
> >  }
> >  
> > +static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
> 
> struct pathor_heap_pool does not exist :-).

Oops! Turns out that for my "compile testing" of the patch on my Arm machine I was using
the wrong O= directory so my .config did not had Panthor enabled.

> 
> > +{
> > +	struct panthor_heap *heap;
> > +
> > +	xa_lock(&pool->xa);
> > +	heap = xa_load(&pool->xa, id);
> > +	xa_unlock(&pool->va);
> 
> Access to panthor_heap_pool::xa is protected by the external
> pathor_heap_pool::lock, so taking the xa lock seems redundant here. How
> about adding a lockdep_assert_held(pool->lock) instead?

panthor_heap_pool_release() does not take the panthor_heap_pool::lock, so the protection
is not really there. I could fix panthor_heap_pool_release() and then add a
lockdep_assert_held() before both calls to xa_load() if you think that's a better
solution.

Best regards,
Liviu

> 
> > +
> > +	return heap;
> > +}
> > +
> >  /**
> >   * panthor_heap_return_chunk() - Return an unused heap chunk
> >   * @pool: The pool this heap belongs to.
> > @@ -375,7 +386,7 @@ int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
> >  		return -EINVAL;
> >  
> >  	down_read(&pool->lock);
> > -	heap = xa_load(&pool->xa, heap_id);
> > +	heap = panthor_heap_from_id(pool, heap_id);
> >  	if (!heap) {
> >  		ret = -EINVAL;
> >  		goto out_unlock;
> > @@ -438,7 +449,7 @@ int panthor_heap_grow(struct panthor_heap_pool *pool,
> >  		return -EINVAL;
> >  
> >  	down_read(&pool->lock);
> > -	heap = xa_load(&pool->xa, heap_id);
> > +	heap = panthor_heap_from_id(pool, heap_id);
> >  	if (!heap) {
> >  		ret = -EINVAL;
> >  		goto out_unlock;
> > diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> > index 8ca85526491e6..8b5cda9d21768 100644
> > --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> > +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> > @@ -1580,7 +1580,9 @@ panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle)
> >  {
> >  	struct panthor_vm *vm;
> >  
> > +	xa_lock(&pool->xa);
> >  	vm = panthor_vm_get(xa_load(&pool->xa, handle));
> > +	xa_unlock(&pool->va);
> >  
> >  	return vm;
> >  }
>
Steven Price Nov. 6, 2024, 1:17 p.m. UTC | #5
On 06/11/2024 12:07, Liviu Dudau wrote:
> Similar to cac075706f29 ("drm/panthor: Fix race when converting
> group handle to group object") we need to use the XArray's internal
> locking when retrieving a pointer from there for heap and vm.
> 
> Reported-by: Jann Horn <jannh@google.com>
> Cc: Boris Brezillon <boris.brezillon@collabora.com>
> Cc: Steven Price <steven.price@arm.com>
> Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
> ---
>  drivers/gpu/drm/panthor/panthor_heap.c | 15 +++++++++++++--
>  drivers/gpu/drm/panthor/panthor_mmu.c  |  2 ++
>  2 files changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c
> index 3796a9eb22af2..fe0bcb6837f74 100644
> --- a/drivers/gpu/drm/panthor/panthor_heap.c
> +++ b/drivers/gpu/drm/panthor/panthor_heap.c
> @@ -351,6 +351,17 @@ int panthor_heap_create(struct panthor_heap_pool *pool,
>  	return ret;
>  }
>  
> +static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
> +{
> +	struct panthor_heap *heap;
> +
> +	xa_lock(&pool->xa);
> +	heap = xa_load(&pool->xa, id);
> +	xa_unlock(&pool->va);
> +
> +	return heap;
> +}

This locking doesn't actually achieve anything - XArray already deals
with the concurrency (with RCU), and if we're doing nothing more than an
xa_load() then we don't need (extra) locking (unless using the __
prefixed functions).

And, as Boris has pointed out, pool->lock is held. As you mention in
your email the missing bit might be panthor_heap_pool_release() - if
it's not holding a lock then the heap could be freed immediately after
panthor_heap_from_id() returns (even with the above change).

Steve

> +
>  /**
>   * panthor_heap_return_chunk() - Return an unused heap chunk
>   * @pool: The pool this heap belongs to.
> @@ -375,7 +386,7 @@ int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
>  		return -EINVAL;
>  
>  	down_read(&pool->lock);
> -	heap = xa_load(&pool->xa, heap_id);
> +	heap = panthor_heap_from_id(pool, heap_id);
>  	if (!heap) {
>  		ret = -EINVAL;
>  		goto out_unlock;
> @@ -438,7 +449,7 @@ int panthor_heap_grow(struct panthor_heap_pool *pool,
>  		return -EINVAL;
>  
>  	down_read(&pool->lock);
> -	heap = xa_load(&pool->xa, heap_id);
> +	heap = panthor_heap_from_id(pool, heap_id);
>  	if (!heap) {
>  		ret = -EINVAL;
>  		goto out_unlock;
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> index 8ca85526491e6..8b5cda9d21768 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -1580,7 +1580,9 @@ panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle)
>  {
>  	struct panthor_vm *vm;
>  
> +	xa_lock(&pool->xa);
>  	vm = panthor_vm_get(xa_load(&pool->xa, handle));
> +	xa_unlock(&pool->va);
>  
>  	return vm;
>  }
Boris Brezillon Nov. 6, 2024, 1:21 p.m. UTC | #6
On Wed, 6 Nov 2024 13:10:37 +0000
Liviu Dudau <liviu.dudau@arm.com> wrote:

> panthor_heap_pool_release() does not take the panthor_heap_pool::lock, so the protection
> is not really there. I could fix panthor_heap_pool_release() and then add a
> lockdep_assert_held() before both calls to xa_load() if you think that's a better
> solution.

Hm, but panthor_heap_pool_release() doesn't release the heap contexts,
it just calls xa_destroy(). If we have objects remaining in the xarray,
they'll be leaked, but that's not a race. BTW, can we make this two
separate patches. I feel like the thing on the vm is an actual fix,
while the second one (adding a helper with a lockdep_assert()) is
safety net that's worth having, but not necessarily something we need
to backport.
Boris Brezillon Nov. 6, 2024, 1:34 p.m. UTC | #7
On Wed, 6 Nov 2024 13:17:29 +0000
Steven Price <steven.price@arm.com> wrote:

> On 06/11/2024 12:07, Liviu Dudau wrote:
> > Similar to cac075706f29 ("drm/panthor: Fix race when converting
> > group handle to group object") we need to use the XArray's internal
> > locking when retrieving a pointer from there for heap and vm.
> > 
> > Reported-by: Jann Horn <jannh@google.com>
> > Cc: Boris Brezillon <boris.brezillon@collabora.com>
> > Cc: Steven Price <steven.price@arm.com>
> > Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
> > ---
> >  drivers/gpu/drm/panthor/panthor_heap.c | 15 +++++++++++++--
> >  drivers/gpu/drm/panthor/panthor_mmu.c  |  2 ++
> >  2 files changed, 15 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c
> > index 3796a9eb22af2..fe0bcb6837f74 100644
> > --- a/drivers/gpu/drm/panthor/panthor_heap.c
> > +++ b/drivers/gpu/drm/panthor/panthor_heap.c
> > @@ -351,6 +351,17 @@ int panthor_heap_create(struct panthor_heap_pool *pool,
> >  	return ret;
> >  }
> >  
> > +static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
> > +{
> > +	struct panthor_heap *heap;
> > +
> > +	xa_lock(&pool->xa);
> > +	heap = xa_load(&pool->xa, id);
> > +	xa_unlock(&pool->va);
> > +
> > +	return heap;
> > +}  
> 
> This locking doesn't actually achieve anything - XArray already deals
> with the concurrency (with RCU), and if we're doing nothing more than an
> xa_load() then we don't need (extra) locking (unless using the __
> prefixed functions).
> 
> And, as Boris has pointed out, pool->lock is held. As you mention in
> your email the missing bit might be panthor_heap_pool_release() - if
> it's not holding a lock then the heap could be freed immediately after
> panthor_heap_from_id() returns (even with the above change).

Hm, if we call panthor_heap_from_id(), that means we have a heap pool to
pass, and incidentally, we're supposed to hold a ref on this pool. So I
don't really see how the heap pool can go away, unless someone messed
up with the refcounting in the meantime.

> 
> Steve
> 
> > +
> >  /**
> >   * panthor_heap_return_chunk() - Return an unused heap chunk
> >   * @pool: The pool this heap belongs to.
> > @@ -375,7 +386,7 @@ int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
> >  		return -EINVAL;
> >  
> >  	down_read(&pool->lock);
> > -	heap = xa_load(&pool->xa, heap_id);
> > +	heap = panthor_heap_from_id(pool, heap_id);
> >  	if (!heap) {
> >  		ret = -EINVAL;
> >  		goto out_unlock;
> > @@ -438,7 +449,7 @@ int panthor_heap_grow(struct panthor_heap_pool *pool,
> >  		return -EINVAL;
> >  
> >  	down_read(&pool->lock);
> > -	heap = xa_load(&pool->xa, heap_id);
> > +	heap = panthor_heap_from_id(pool, heap_id);
> >  	if (!heap) {
> >  		ret = -EINVAL;
> >  		goto out_unlock;
> > diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> > index 8ca85526491e6..8b5cda9d21768 100644
> > --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> > +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> > @@ -1580,7 +1580,9 @@ panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle)
> >  {
> >  	struct panthor_vm *vm;
> >  
> > +	xa_lock(&pool->xa);
> >  	vm = panthor_vm_get(xa_load(&pool->xa, handle));
> > +	xa_unlock(&pool->va);
> >  
> >  	return vm;
> >  }  
>
Steven Price Nov. 6, 2024, 1:40 p.m. UTC | #8
On 06/11/2024 13:34, Boris Brezillon wrote:
> On Wed, 6 Nov 2024 13:17:29 +0000
> Steven Price <steven.price@arm.com> wrote:
> 
>> On 06/11/2024 12:07, Liviu Dudau wrote:
>>> Similar to cac075706f29 ("drm/panthor: Fix race when converting
>>> group handle to group object") we need to use the XArray's internal
>>> locking when retrieving a pointer from there for heap and vm.
>>>
>>> Reported-by: Jann Horn <jannh@google.com>
>>> Cc: Boris Brezillon <boris.brezillon@collabora.com>
>>> Cc: Steven Price <steven.price@arm.com>
>>> Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
>>> ---
>>>  drivers/gpu/drm/panthor/panthor_heap.c | 15 +++++++++++++--
>>>  drivers/gpu/drm/panthor/panthor_mmu.c  |  2 ++
>>>  2 files changed, 15 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c
>>> index 3796a9eb22af2..fe0bcb6837f74 100644
>>> --- a/drivers/gpu/drm/panthor/panthor_heap.c
>>> +++ b/drivers/gpu/drm/panthor/panthor_heap.c
>>> @@ -351,6 +351,17 @@ int panthor_heap_create(struct panthor_heap_pool *pool,
>>>  	return ret;
>>>  }
>>>  
>>> +static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
>>> +{
>>> +	struct panthor_heap *heap;
>>> +
>>> +	xa_lock(&pool->xa);
>>> +	heap = xa_load(&pool->xa, id);
>>> +	xa_unlock(&pool->va);
>>> +
>>> +	return heap;
>>> +}  
>>
>> This locking doesn't actually achieve anything - XArray already deals
>> with the concurrency (with RCU), and if we're doing nothing more than an
>> xa_load() then we don't need (extra) locking (unless using the __
>> prefixed functions).
>>
>> And, as Boris has pointed out, pool->lock is held. As you mention in
>> your email the missing bit might be panthor_heap_pool_release() - if
>> it's not holding a lock then the heap could be freed immediately after
>> panthor_heap_from_id() returns (even with the above change).
> 
> Hm, if we call panthor_heap_from_id(), that means we have a heap pool to
> pass, and incidentally, we're supposed to hold a ref on this pool. So I
> don't really see how the heap pool can go away, unless someone messed
> up with the refcounting in the meantime.

No I'm not sure how it could happen... ;) Hence the "might" - I'd
assumed Liviu had a good reason for thinking there might be a
race/missing ref.

Really it's panthor_heap_destroy_locked() that we need to consider
racing with - as that can remove (and free) an entry from the XArray. It
might be worth putting an lockdep annotation in there to check that the
lock is indeed held. But the code currently looks correct.

Steve

>>
>> Steve
>>
>>> +
>>>  /**
>>>   * panthor_heap_return_chunk() - Return an unused heap chunk
>>>   * @pool: The pool this heap belongs to.
>>> @@ -375,7 +386,7 @@ int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
>>>  		return -EINVAL;
>>>  
>>>  	down_read(&pool->lock);
>>> -	heap = xa_load(&pool->xa, heap_id);
>>> +	heap = panthor_heap_from_id(pool, heap_id);
>>>  	if (!heap) {
>>>  		ret = -EINVAL;
>>>  		goto out_unlock;
>>> @@ -438,7 +449,7 @@ int panthor_heap_grow(struct panthor_heap_pool *pool,
>>>  		return -EINVAL;
>>>  
>>>  	down_read(&pool->lock);
>>> -	heap = xa_load(&pool->xa, heap_id);
>>> +	heap = panthor_heap_from_id(pool, heap_id);
>>>  	if (!heap) {
>>>  		ret = -EINVAL;
>>>  		goto out_unlock;
>>> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
>>> index 8ca85526491e6..8b5cda9d21768 100644
>>> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
>>> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
>>> @@ -1580,7 +1580,9 @@ panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle)
>>>  {
>>>  	struct panthor_vm *vm;
>>>  
>>> +	xa_lock(&pool->xa);
>>>  	vm = panthor_vm_get(xa_load(&pool->xa, handle));
>>> +	xa_unlock(&pool->va);
>>>  
>>>  	return vm;
>>>  }  
>>
>
Mihail Atanassov Nov. 6, 2024, 2:54 p.m. UTC | #9
On 06/11/2024 12:14, Mihail Atanassov wrote:
> Hi Liviu,
> 
> On 06/11/2024 12:07, Liviu Dudau wrote:
>> Similar to cac075706f29 ("drm/panthor: Fix race when converting
>> group handle to group object") we need to use the XArray's internal
>> locking when retrieving a pointer from there for heap and vm.
>>
>> Reported-by: Jann Horn <jannh@google.com>
>> Cc: Boris Brezillon <boris.brezillon@collabora.com>
>> Cc: Steven Price <steven.price@arm.com>
>> Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
>> ---
>>   drivers/gpu/drm/panthor/panthor_heap.c | 15 +++++++++++++--
>>   drivers/gpu/drm/panthor/panthor_mmu.c  |  2 ++
>>   2 files changed, 15 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/ 
>> panthor/panthor_heap.c
>> index 3796a9eb22af2..fe0bcb6837f74 100644
>> --- a/drivers/gpu/drm/panthor/panthor_heap.c
>> +++ b/drivers/gpu/drm/panthor/panthor_heap.c
>> @@ -351,6 +351,17 @@ int panthor_heap_create(struct panthor_heap_pool 
>> *pool,
>>       return ret;
>>   }
>> +static struct panthor_heap *panthor_heap_from_id(struct 
>> pathor_heap_pool *pool, u32 id)

s/pathor/panthor/, but you already know that.

>> +{
>> +    struct panthor_heap *heap;
>> +
>> +    xa_lock(&pool->xa);
>> +    heap = xa_load(&pool->xa, id);
>> +    xa_unlock(&pool->va);

s/va/xa

>> +
>> +    return heap;
>> +}
>> +
>>   /**
>>    * panthor_heap_return_chunk() - Return an unused heap chunk
>>    * @pool: The pool this heap belongs to.
>> @@ -375,7 +386,7 @@ int panthor_heap_return_chunk(struct 
>> panthor_heap_pool *pool,
>>           return -EINVAL;
>>       down_read(&pool->lock);
>> -    heap = xa_load(&pool->xa, heap_id);
>> +    heap = panthor_heap_from_id(pool, heap_id);
>>       if (!heap) {
>>           ret = -EINVAL;
>>           goto out_unlock;
>> @@ -438,7 +449,7 @@ int panthor_heap_grow(struct panthor_heap_pool *pool,
>>           return -EINVAL;
>>       down_read(&pool->lock);
>> -    heap = xa_load(&pool->xa, heap_id);
>> +    heap = panthor_heap_from_id(pool, heap_id);
>>       if (!heap) {
>>           ret = -EINVAL;
>>           goto out_unlock;
>> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/ 
>> panthor/panthor_mmu.c
>> index 8ca85526491e6..8b5cda9d21768 100644
>> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
>> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
>> @@ -1580,7 +1580,9 @@ panthor_vm_pool_get_vm(struct panthor_vm_pool 
>> *pool, u32 handle)
>>   {
>>       struct panthor_vm *vm;
>> +    xa_lock(&pool->xa);
>>       vm = panthor_vm_get(xa_load(&pool->xa, handle));
>> +    xa_unlock(&pool->va);

s/va/xa/

>>       return vm;
>>   }
> 
> Reviewed-by: Mihail Atanassov <mihail.atanassov@arm.com>

Lesson learned for me -- at least build-test what you review :).

With the typos fixed up so this patch builds, I can't observe the race 
in `panthor_vm_pool_get_vm` any more.

The other comments on this patch notwithstanding,

Tested-by: Mihail Atanassov <mihail.atanassov@arm.com>

>
kernel test robot Nov. 6, 2024, 5:32 p.m. UTC | #10
Hi Liviu,

kernel test robot noticed the following build errors:

[auto build test ERROR on drm-misc/drm-misc-next]
[also build test ERROR on linus/master v6.12-rc6 next-20241106]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Liviu-Dudau/drm-panthor-Lock-XArray-when-getting-entries-for-heap-and-VM/20241106-200841
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:    https://lore.kernel.org/r/20241106120748.290697-1-liviu.dudau%40arm.com
patch subject: [PATCH] drm/panthor: Lock XArray when getting entries for heap and VM
config: arc-allyesconfig (https://download.01.org/0day-ci/archive/20241107/202411070140.L4JAwkvX-lkp@intel.com/config)
compiler: arceb-elf-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241107/202411070140.L4JAwkvX-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202411070140.L4JAwkvX-lkp@intel.com/

All error/warnings (new ones prefixed by >>):

>> drivers/gpu/drm/panthor/panthor_heap.c:354:57: warning: 'struct pathor_heap_pool' declared inside parameter list will not be visible outside of this definition or declaration
     354 | static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
         |                                                         ^~~~~~~~~~~~~~~~
   In file included from include/linux/list_lru.h:14,
                    from include/linux/fs.h:13,
                    from include/linux/huge_mm.h:8,
                    from include/linux/mm.h:1120,
                    from include/linux/scatterlist.h:8,
                    from include/linux/iommu.h:10,
                    from include/linux/io-pgtable.h:6,
                    from drivers/gpu/drm/panthor/panthor_device.h:10,
                    from drivers/gpu/drm/panthor/panthor_heap.c:9:
   drivers/gpu/drm/panthor/panthor_heap.c: In function 'panthor_heap_from_id':
>> drivers/gpu/drm/panthor/panthor_heap.c:358:22: error: invalid use of undefined type 'struct pathor_heap_pool'
     358 |         xa_lock(&pool->xa);
         |                      ^~
   include/linux/xarray.h:536:45: note: in definition of macro 'xa_lock'
     536 | #define xa_lock(xa)             spin_lock(&(xa)->xa_lock)
         |                                             ^~
   drivers/gpu/drm/panthor/panthor_heap.c:359:29: error: invalid use of undefined type 'struct pathor_heap_pool'
     359 |         heap = xa_load(&pool->xa, id);
         |                             ^~
   drivers/gpu/drm/panthor/panthor_heap.c:360:24: error: invalid use of undefined type 'struct pathor_heap_pool'
     360 |         xa_unlock(&pool->va);
         |                        ^~
   include/linux/xarray.h:537:47: note: in definition of macro 'xa_unlock'
     537 | #define xa_unlock(xa)           spin_unlock(&(xa)->xa_lock)
         |                                               ^~
   drivers/gpu/drm/panthor/panthor_heap.c: In function 'panthor_heap_return_chunk':
>> drivers/gpu/drm/panthor/panthor_heap.c:389:37: error: passing argument 1 of 'panthor_heap_from_id' from incompatible pointer type [-Werror=incompatible-pointer-types]
     389 |         heap = panthor_heap_from_id(pool, heap_id);
         |                                     ^~~~
         |                                     |
         |                                     struct panthor_heap_pool *
   drivers/gpu/drm/panthor/panthor_heap.c:354:75: note: expected 'struct pathor_heap_pool *' but argument is of type 'struct panthor_heap_pool *'
     354 | static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
         |                                                  ~~~~~~~~~~~~~~~~~~~~~~~~~^~~~
   drivers/gpu/drm/panthor/panthor_heap.c: In function 'panthor_heap_grow':
   drivers/gpu/drm/panthor/panthor_heap.c:452:37: error: passing argument 1 of 'panthor_heap_from_id' from incompatible pointer type [-Werror=incompatible-pointer-types]
     452 |         heap = panthor_heap_from_id(pool, heap_id);
         |                                     ^~~~
         |                                     |
         |                                     struct panthor_heap_pool *
   drivers/gpu/drm/panthor/panthor_heap.c:354:75: note: expected 'struct pathor_heap_pool *' but argument is of type 'struct panthor_heap_pool *'
     354 | static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
         |                                                  ~~~~~~~~~~~~~~~~~~~~~~~~~^~~~
   cc1: some warnings being treated as errors
--
   In file included from include/linux/list_lru.h:14,
                    from include/linux/fs.h:13,
                    from include/linux/seq_file.h:11,
                    from include/drm/drm_debugfs.h:36,
                    from drivers/gpu/drm/panthor/panthor_mmu.c:5:
   drivers/gpu/drm/panthor/panthor_mmu.c: In function 'panthor_vm_pool_get_vm':
>> drivers/gpu/drm/panthor/panthor_mmu.c:1585:26: error: 'struct panthor_vm_pool' has no member named 'va'; did you mean 'xa'?
    1585 |         xa_unlock(&pool->va);
         |                          ^~
   include/linux/xarray.h:537:47: note: in definition of macro 'xa_unlock'
     537 | #define xa_unlock(xa)           spin_unlock(&(xa)->xa_lock)
         |                                               ^~


vim +358 drivers/gpu/drm/panthor/panthor_heap.c

   353	
 > 354	static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
   355	{
   356		struct panthor_heap *heap;
   357	
 > 358		xa_lock(&pool->xa);
   359		heap = xa_load(&pool->xa, id);
   360		xa_unlock(&pool->va);
   361	
   362		return heap;
   363	}
   364	
   365	/**
   366	 * panthor_heap_return_chunk() - Return an unused heap chunk
   367	 * @pool: The pool this heap belongs to.
   368	 * @heap_gpu_va: The GPU address of the heap context.
   369	 * @chunk_gpu_va: The chunk VA to return.
   370	 *
   371	 * This function is used when a chunk allocated with panthor_heap_grow()
   372	 * couldn't be linked to the heap context through the FW interface because
   373	 * the group requesting the allocation was scheduled out in the meantime.
   374	 */
   375	int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
   376				      u64 heap_gpu_va,
   377				      u64 chunk_gpu_va)
   378	{
   379		u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
   380		u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
   381		struct panthor_heap_chunk *chunk, *tmp, *removed = NULL;
   382		struct panthor_heap *heap;
   383		int ret;
   384	
   385		if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
   386			return -EINVAL;
   387	
   388		down_read(&pool->lock);
 > 389		heap = panthor_heap_from_id(pool, heap_id);
   390		if (!heap) {
   391			ret = -EINVAL;
   392			goto out_unlock;
   393		}
   394	
   395		chunk_gpu_va &= GENMASK_ULL(63, 12);
   396	
   397		mutex_lock(&heap->lock);
   398		list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
   399			if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) {
   400				removed = chunk;
   401				list_del(&chunk->node);
   402				heap->chunk_count--;
   403				break;
   404			}
   405		}
   406		mutex_unlock(&heap->lock);
   407	
   408		if (removed) {
   409			panthor_kernel_bo_destroy(chunk->bo);
   410			kfree(chunk);
   411			ret = 0;
   412		} else {
   413			ret = -EINVAL;
   414		}
   415	
   416	out_unlock:
   417		up_read(&pool->lock);
   418		return ret;
   419	}
   420
kernel test robot Nov. 6, 2024, 5:53 p.m. UTC | #11
Hi Liviu,

kernel test robot noticed the following build errors:

[auto build test ERROR on drm-misc/drm-misc-next]
[also build test ERROR on linus/master v6.12-rc6 next-20241106]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Liviu-Dudau/drm-panthor-Lock-XArray-when-getting-entries-for-heap-and-VM/20241106-200841
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:    https://lore.kernel.org/r/20241106120748.290697-1-liviu.dudau%40arm.com
patch subject: [PATCH] drm/panthor: Lock XArray when getting entries for heap and VM
config: x86_64-buildonly-randconfig-002-20241106 (https://download.01.org/0day-ci/archive/20241107/202411070115.YPuBa5QX-lkp@intel.com/config)
compiler: clang version 19.1.3 (https://github.com/llvm/llvm-project ab51eccf88f5321e7c60591c5546b254b6afab99)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241107/202411070115.YPuBa5QX-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202411070115.YPuBa5QX-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from drivers/gpu/drm/panthor/panthor_heap.c:9:
   In file included from drivers/gpu/drm/panthor/panthor_device.h:10:
   In file included from include/linux/io-pgtable.h:6:
   In file included from include/linux/iommu.h:10:
   In file included from include/linux/scatterlist.h:8:
   In file included from include/linux/mm.h:2213:
   include/linux/vmstat.h:504:43: error: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Werror,-Wenum-enum-conversion]
     504 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     505 |                            item];
         |                            ~~~~
   include/linux/vmstat.h:511:43: error: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Werror,-Wenum-enum-conversion]
     511 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     512 |                            NR_VM_NUMA_EVENT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~~
   include/linux/vmstat.h:518:36: error: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Werror,-Wenum-enum-conversion]
     518 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
   include/linux/vmstat.h:524:43: error: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Werror,-Wenum-enum-conversion]
     524 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     525 |                            NR_VM_NUMA_EVENT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~~
>> drivers/gpu/drm/panthor/panthor_heap.c:354:57: error: declaration of 'struct pathor_heap_pool' will not be visible outside of this function [-Werror,-Wvisibility]
     354 | static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
         |                                                         ^
   drivers/gpu/drm/panthor/panthor_heap.c:358:15: error: incomplete definition of type 'struct pathor_heap_pool'
     358 |         xa_lock(&pool->xa);
         |                  ~~~~^
   include/linux/xarray.h:536:34: note: expanded from macro 'xa_lock'
     536 | #define xa_lock(xa)             spin_lock(&(xa)->xa_lock)
         |                                             ^~
   drivers/gpu/drm/panthor/panthor_heap.c:354:57: note: forward declaration of 'struct pathor_heap_pool'
     354 | static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
         |                                                         ^
   drivers/gpu/drm/panthor/panthor_heap.c:359:22: error: incomplete definition of type 'struct pathor_heap_pool'
     359 |         heap = xa_load(&pool->xa, id);
         |                         ~~~~^
   drivers/gpu/drm/panthor/panthor_heap.c:354:57: note: forward declaration of 'struct pathor_heap_pool'
     354 | static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
         |                                                         ^
   drivers/gpu/drm/panthor/panthor_heap.c:360:17: error: incomplete definition of type 'struct pathor_heap_pool'
     360 |         xa_unlock(&pool->va);
         |                    ~~~~^
   include/linux/xarray.h:537:38: note: expanded from macro 'xa_unlock'
     537 | #define xa_unlock(xa)           spin_unlock(&(xa)->xa_lock)
         |                                               ^~
   drivers/gpu/drm/panthor/panthor_heap.c:354:57: note: forward declaration of 'struct pathor_heap_pool'
     354 | static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
         |                                                         ^
   drivers/gpu/drm/panthor/panthor_heap.c:389:30: error: incompatible pointer types passing 'struct panthor_heap_pool *' to parameter of type 'struct pathor_heap_pool *' [-Werror,-Wincompatible-pointer-types]
     389 |         heap = panthor_heap_from_id(pool, heap_id);
         |                                     ^~~~
   drivers/gpu/drm/panthor/panthor_heap.c:354:75: note: passing argument to parameter 'pool' here
     354 | static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
         |                                                                           ^
   drivers/gpu/drm/panthor/panthor_heap.c:452:30: error: incompatible pointer types passing 'struct panthor_heap_pool *' to parameter of type 'struct pathor_heap_pool *' [-Werror,-Wincompatible-pointer-types]
     452 |         heap = panthor_heap_from_id(pool, heap_id);
         |                                     ^~~~
   drivers/gpu/drm/panthor/panthor_heap.c:354:75: note: passing argument to parameter 'pool' here
     354 | static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
         |                                                                           ^
   10 errors generated.


vim +354 drivers/gpu/drm/panthor/panthor_heap.c

   353	
 > 354	static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
   355	{
   356		struct panthor_heap *heap;
   357	
   358		xa_lock(&pool->xa);
   359		heap = xa_load(&pool->xa, id);
   360		xa_unlock(&pool->va);
   361	
   362		return heap;
   363	}
   364
Liviu Dudau Nov. 6, 2024, 6:59 p.m. UTC | #12
On Wed, Nov 06, 2024 at 02:21:33PM +0100, Boris Brezillon wrote:
> On Wed, 6 Nov 2024 13:10:37 +0000
> Liviu Dudau <liviu.dudau@arm.com> wrote:
> 
> > panthor_heap_pool_release() does not take the panthor_heap_pool::lock, so the protection
> > is not really there. I could fix panthor_heap_pool_release() and then add a
> > lockdep_assert_held() before both calls to xa_load() if you think that's a better
> > solution.
> 
> Hm, but panthor_heap_pool_release() doesn't release the heap contexts,
> it just calls xa_destroy(). If we have objects remaining in the xarray,
> they'll be leaked, but that's not a race. BTW, can we make this two
> separate patches. I feel like the thing on the vm is an actual fix,
> while the second one (adding a helper with a lockdep_assert()) is
> safety net that's worth having, but not necessarily something we need
> to backport.

I've decided to drop the panthor_heap.c changes as Boris is right, the pool->lock
should be enough. Adding a lockdep_assert() right after the down_write() call
also feels a bit silly, so I did not bother with it.

Best regards,
Liviu
diff mbox series

Patch

diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c
index 3796a9eb22af2..fe0bcb6837f74 100644
--- a/drivers/gpu/drm/panthor/panthor_heap.c
+++ b/drivers/gpu/drm/panthor/panthor_heap.c
@@ -351,6 +351,17 @@  int panthor_heap_create(struct panthor_heap_pool *pool,
 	return ret;
 }
 
+static struct panthor_heap *panthor_heap_from_id(struct pathor_heap_pool *pool, u32 id)
+{
+	struct panthor_heap *heap;
+
+	xa_lock(&pool->xa);
+	heap = xa_load(&pool->xa, id);
+	xa_unlock(&pool->va);
+
+	return heap;
+}
+
 /**
  * panthor_heap_return_chunk() - Return an unused heap chunk
  * @pool: The pool this heap belongs to.
@@ -375,7 +386,7 @@  int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
 		return -EINVAL;
 
 	down_read(&pool->lock);
-	heap = xa_load(&pool->xa, heap_id);
+	heap = panthor_heap_from_id(pool, heap_id);
 	if (!heap) {
 		ret = -EINVAL;
 		goto out_unlock;
@@ -438,7 +449,7 @@  int panthor_heap_grow(struct panthor_heap_pool *pool,
 		return -EINVAL;
 
 	down_read(&pool->lock);
-	heap = xa_load(&pool->xa, heap_id);
+	heap = panthor_heap_from_id(pool, heap_id);
 	if (!heap) {
 		ret = -EINVAL;
 		goto out_unlock;
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
index 8ca85526491e6..8b5cda9d21768 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -1580,7 +1580,9 @@  panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle)
 {
 	struct panthor_vm *vm;
 
+	xa_lock(&pool->xa);
 	vm = panthor_vm_get(xa_load(&pool->xa, handle));
+	xa_unlock(&pool->va);
 
 	return vm;
 }