diff mbox

drm/radeon: don't leave fence blocked process on failed GPU reset

Message ID 1355758146-7095-1-git-send-email-j.glisse@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jerome Glisse Dec. 17, 2012, 3:29 p.m. UTC
From: Jerome Glisse <jglisse@redhat.com>

Force all fence to signal if GPU reset failed so no process get stuck
on waiting fence.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
 drivers/gpu/drm/radeon/radeon.h        |  1 +
 drivers/gpu/drm/radeon/radeon_device.c |  1 +
 drivers/gpu/drm/radeon/radeon_fence.c  | 19 +++++++++++++++++++
 3 files changed, 21 insertions(+)

Comments

Christian König Dec. 17, 2012, 3:46 p.m. UTC | #1
On 17.12.2012 16:29, j.glisse@gmail.com wrote:
> From: Jerome Glisse <jglisse@redhat.com>
>
> Force all fence to signal if GPU reset failed so no process get stuck
> on waiting fence.
>
> Signed-off-by: Jerome Glisse <jglisse@redhat.com>

Seems to make sense.

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/radeon/radeon.h        |  1 +
>   drivers/gpu/drm/radeon/radeon_device.c |  1 +
>   drivers/gpu/drm/radeon/radeon_fence.c  | 19 +++++++++++++++++++
>   3 files changed, 21 insertions(+)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 5d68346..9c7625c 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -225,6 +225,7 @@ struct radeon_fence {
>   int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
>   int radeon_fence_driver_init(struct radeon_device *rdev);
>   void radeon_fence_driver_fini(struct radeon_device *rdev);
> +void radeon_fence_driver_force_completion(struct radeon_device *rdev);
>   int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
>   void radeon_fence_process(struct radeon_device *rdev, int ring);
>   bool radeon_fence_signaled(struct radeon_fence *fence);
> diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
> index e2f5f88..774fae7 100644
> --- a/drivers/gpu/drm/radeon/radeon_device.c
> +++ b/drivers/gpu/drm/radeon/radeon_device.c
> @@ -1357,6 +1357,7 @@ retry:
>   			}
>   		}
>   	} else {
> +		radeon_fence_driver_force_completion(rdev);
>   		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
>   			kfree(ring_data[i]);
>   		}
> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
> index 22bd6c2..bf7b20e 100644
> --- a/drivers/gpu/drm/radeon/radeon_fence.c
> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
> @@ -868,6 +868,25 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
>   	mutex_unlock(&rdev->ring_lock);
>   }
>   
> +/**
> + * radeon_fence_driver_force_completion - force all fence waiter to complete
> + *
> + * @rdev: radeon device pointer
> + *
> + * In case of GPU reset failure make sure no process keep waiting on fence
> + * that will never complete.
> + */
> +void radeon_fence_driver_force_completion(struct radeon_device *rdev)
> +{
> +	int ring;
> +
> +	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
> +		if (!rdev->fence_drv[ring].initialized)
> +			continue;
> +		radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
> +	}
> +}
> +
>   
>   /*
>    * Fence debugfs
Alex Deucher Dec. 17, 2012, 9:39 p.m. UTC | #2
Applied to my fixes branch.  Thanks!

2012/12/17 Christian König <deathsimple@vodafone.de>:
> Reviewed-by: Christian König <christian.koenig@amd.com>
diff mbox

Patch

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5d68346..9c7625c 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -225,6 +225,7 @@  struct radeon_fence {
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
 int radeon_fence_driver_init(struct radeon_device *rdev);
 void radeon_fence_driver_fini(struct radeon_device *rdev);
+void radeon_fence_driver_force_completion(struct radeon_device *rdev);
 int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
 void radeon_fence_process(struct radeon_device *rdev, int ring);
 bool radeon_fence_signaled(struct radeon_fence *fence);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index e2f5f88..774fae7 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1357,6 +1357,7 @@  retry:
 			}
 		}
 	} else {
+		radeon_fence_driver_force_completion(rdev);
 		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 			kfree(ring_data[i]);
 		}
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 22bd6c2..bf7b20e 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -868,6 +868,25 @@  void radeon_fence_driver_fini(struct radeon_device *rdev)
 	mutex_unlock(&rdev->ring_lock);
 }
 
+/**
+ * radeon_fence_driver_force_completion - force all fence waiter to complete
+ *
+ * @rdev: radeon device pointer
+ *
+ * In case of GPU reset failure make sure no process keep waiting on fence
+ * that will never complete.
+ */
+void radeon_fence_driver_force_completion(struct radeon_device *rdev)
+{
+	int ring;
+
+	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
+		if (!rdev->fence_drv[ring].initialized)
+			continue;
+		radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
+	}
+}
+
 
 /*
  * Fence debugfs