diff mbox series

[1/3] drm/ttm: Clear the buffer object bulk move at individualize time

Message ID 20230525150205.194098-2-thomas.hellstrom@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series drm/ttm: Reservation object individualization update | expand

Commit Message

Thomas Hellstrom May 25, 2023, 3:02 p.m. UTC
Clearing the buffer object bulk move is closely tied to individualizing
the resv, since that is when we effectively detach the bo from a vm.

Clearing the bulk move also requires the bo resv, which we have readily
locked at individualizing time without clobbering the much wider vm
lock.

So Clear the buffer object bulk_move at individualizing time, and update
the code comments.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

Comments

Christian König June 1, 2023, 10:38 a.m. UTC | #1
Am 25.05.23 um 17:02 schrieb Thomas Hellström:
> Clearing the buffer object bulk move is closely tied to individualizing
> the resv, since that is when we effectively detach the bo from a vm.
>
> Clearing the bulk move also requires the bo resv, which we have readily
> locked at individualizing time without clobbering the much wider vm
> lock.
>
> So Clear the buffer object bulk_move at individualizing time, and update
> the code comments.

WOW, there are some big misunderstandings here. First of all the 
assumption that the reservation lock is taken at individualization time 
is completely incorrect.

Instead this is called during release with an unknown state of the 
reservation lock, this can be locked or not depending who is dropping 
the last reference.

Then when you use bulk move, the area covered by the bulk is protected 
by the common reservation lock. So when a BO is to be removed from the 
bulk it's a must have to remove it from the bulk *before* dropping the 
last reference.

Regards,
Christian.

>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 30 ++++++++++++++++++++++--------
>   1 file changed, 22 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index bd5dae4d1624..57cc9f845adc 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -193,20 +193,33 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
>   	BUG_ON(!dma_resv_trylock(&bo->base._resv));
>   
>   	r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv);
> -	dma_resv_unlock(&bo->base._resv);
> -	if (r)
> -		return r;
>   
> -	if (bo->type != ttm_bo_type_sg) {
> -		/* This works because the BO is about to be destroyed and nobody
> -		 * reference it any more. The only tricky case is the trylock on
> -		 * the resv object while holding the lru_lock.
> +	if (!r && bo->type != ttm_bo_type_sg) {
> +		/*
> +		 * The TTM bo refcount is now zero and hence nobody will
> +		 * therefore try to lock the bo at this point: the LRU
> +		 * list lookups will trylock even if the refcount is zero,
> +		 * but will only do that under the LRU lock and will
> +		 * then immediately back off under the same LRU lock when it
> +		 * sees the zero refcount.
>   		 */
>   		spin_lock(&bo->bdev->lru_lock);
>   		bo->base.resv = &bo->base._resv;
> +
> +		/* Since bulk move is closely tied with the shared resv,
> +		 * clear it when we have now individualized, if that was not
> +		 * done by the driver already.
> +		 */
> +		if (bo->bulk_move) {
> +			if (bo->resource)
> +				ttm_resource_del_bulk_move(bo->resource, bo);
> +			bo->bulk_move = NULL;
> +		}
>   		spin_unlock(&bo->bdev->lru_lock);
>   	}
>   
> +	dma_resv_unlock(&bo->base._resv);
> +
>   	return r;
>   }
>   
> @@ -324,7 +337,6 @@ static void ttm_bo_release(struct kref *kref)
>   	int ret;
>   
>   	WARN_ON_ONCE(bo->pin_count);
> -	WARN_ON_ONCE(bo->bulk_move);
>   
>   	if (!bo->deleted) {
>   		ret = ttm_bo_individualize_resv(bo);
> @@ -337,6 +349,8 @@ static void ttm_bo_release(struct kref *kref)
>   					      30 * HZ);
>   		}
>   
> +		WARN_ON_ONCE(bo->bulk_move);
> +
>   		if (bo->bdev->funcs->release_notify)
>   			bo->bdev->funcs->release_notify(bo);
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index bd5dae4d1624..57cc9f845adc 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -193,20 +193,33 @@  static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
 	BUG_ON(!dma_resv_trylock(&bo->base._resv));
 
 	r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv);
-	dma_resv_unlock(&bo->base._resv);
-	if (r)
-		return r;
 
-	if (bo->type != ttm_bo_type_sg) {
-		/* This works because the BO is about to be destroyed and nobody
-		 * reference it any more. The only tricky case is the trylock on
-		 * the resv object while holding the lru_lock.
+	if (!r && bo->type != ttm_bo_type_sg) {
+		/*
+		 * The TTM bo refcount is now zero and hence nobody will
+		 * therefore try to lock the bo at this point: the LRU
+		 * list lookups will trylock even if the refcount is zero,
+		 * but will only do that under the LRU lock and will
+		 * then immediately back off under the same LRU lock when it
+		 * sees the zero refcount.
 		 */
 		spin_lock(&bo->bdev->lru_lock);
 		bo->base.resv = &bo->base._resv;
+
+		/* Since bulk move is closely tied with the shared resv,
+		 * clear it when we have now individualized, if that was not
+		 * done by the driver already.
+		 */
+		if (bo->bulk_move) {
+			if (bo->resource)
+				ttm_resource_del_bulk_move(bo->resource, bo);
+			bo->bulk_move = NULL;
+		}
 		spin_unlock(&bo->bdev->lru_lock);
 	}
 
+	dma_resv_unlock(&bo->base._resv);
+
 	return r;
 }
 
@@ -324,7 +337,6 @@  static void ttm_bo_release(struct kref *kref)
 	int ret;
 
 	WARN_ON_ONCE(bo->pin_count);
-	WARN_ON_ONCE(bo->bulk_move);
 
 	if (!bo->deleted) {
 		ret = ttm_bo_individualize_resv(bo);
@@ -337,6 +349,8 @@  static void ttm_bo_release(struct kref *kref)
 					      30 * HZ);
 		}
 
+		WARN_ON_ONCE(bo->bulk_move);
+
 		if (bo->bdev->funcs->release_notify)
 			bo->bdev->funcs->release_notify(bo);