@@ -349,6 +349,7 @@ extern void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw,
* Fences.
*/
struct radeon_fence_driver {
+ struct radeon_device *rdev;
uint32_t scratch_reg;
uint64_t gpu_addr;
volatile uint32_t *cpu_addr;
@@ -356,6 +357,7 @@ struct radeon_fence_driver {
uint64_t sync_seq[RADEON_NUM_RINGS];
atomic64_t last_seq;
bool initialized;
+ struct delayed_work fence_check_work;
};
struct radeon_fence {
@@ -97,6 +97,20 @@ static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
return seq;
}
+static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
+{
+ if (!atomic_read(&rdev->irq.ring_int[ring]))
+ return;
+
+ /*
+ * Do not reset the timer here with mod_delayed_work,
+ * this can livelock in an interaction with TTM delayed destroy.
+ */
+ queue_delayed_work(system_power_efficient_wq,
+ &rdev->fence_drv[ring].fence_check_work,
+ RADEON_FENCE_JIFFIES_TIMEOUT);
+}
+
/**
* radeon_fence_emit - emit a fence on the requested ring
*
@@ -122,19 +136,23 @@ int radeon_fence_emit(struct radeon_device *rdev,
(*fence)->ring = ring;
radeon_fence_ring_emit(rdev, ring, *fence);
trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
+ radeon_fence_schedule_check(rdev, ring);
return 0;
}
/**
- * radeon_fence_process - process a fence
+ * radeon_fence_process_nowake - process a fence without waking up the fence queue
*
* @rdev: radeon_device pointer
* @ring: ring index the fence is associated with
*
* Checks the current fence value and wakes the fence queue
* if the sequence number has increased (all asics).
+ *
+ * Returns true if activity occured on the ring, and the fence_queue should
+ * be waken up.
*/
-void radeon_fence_process(struct radeon_device *rdev, int ring)
+static bool radeon_fence_process_nowake(struct radeon_device *rdev, int ring)
{
uint64_t seq, last_seq, last_emitted;
unsigned count_loop = 0;
@@ -190,7 +208,51 @@ void radeon_fence_process(struct radeon_device *rdev, int ring)
}
} while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
- if (wake)
+ if (seq < last_emitted)
+ radeon_fence_schedule_check(rdev, ring);
+
+ return wake;
+}
+
+static void radeon_fence_driver_check_lockup(struct work_struct *work)
+{
+ struct radeon_fence_driver *fence_drv;
+ struct radeon_device *rdev;
+ unsigned long iring;
+
+ fence_drv = container_of(work, struct radeon_fence_driver, fence_check_work.work);
+ rdev = fence_drv->rdev;
+ iring = fence_drv - &rdev->fence_drv[0];
+
+ down_read(&rdev->exclusive_lock);
+ if (radeon_fence_process_nowake(rdev, iring))
+ wake_up_all(&rdev->fence_queue);
+ else if (radeon_ring_is_lockup(rdev, iring, &rdev->ring[iring])) {
+ /* good news we believe it's a lockup */
+ dev_warn(rdev->dev, "GPU lockup (current fence id "
+ "0x%016llx last fence id 0x%016llx on ring %ld)\n",
+ (uint64_t)atomic64_read(&fence_drv->last_seq),
+ fence_drv->sync_seq[iring], iring);
+
+ /* remember that we need an reset */
+ rdev->needs_reset = true;
+ wake_up_all(&rdev->fence_queue);
+ }
+ up_read(&rdev->exclusive_lock);
+}
+
+/**
+ * radeon_fence_process - process a fence
+ *
+ * @rdev: radeon_device pointer
+ * @ring: ring index the fence is associated with
+ *
+ * Checks the current fence value and wakes the fence queue
+ * if the sequence number has increased (all asics).
+ */
+void radeon_fence_process(struct radeon_device *rdev, int ring)
+{
+ if (radeon_fence_process_nowake(rdev, ring))
wake_up_all(&rdev->fence_queue);
}
@@ -302,84 +364,52 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
{
uint64_t last_seq[RADEON_NUM_RINGS];
bool signaled;
- int i, r;
+ long r;
+ int i;
- while (!radeon_fence_any_seq_signaled(rdev, target_seq)) {
+ signaled = radeon_fence_any_seq_signaled(rdev, target_seq);
+ if (signaled)
+ return 0;
- /* Save current sequence values, used to check for GPU lockups */
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
- if (!target_seq[i])
- continue;
+ /* Save current sequence values, used to check for GPU lockups */
+ for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+ if (!target_seq[i])
+ continue;
- last_seq[i] = atomic64_read(&rdev->fence_drv[i].last_seq);
- trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
- radeon_irq_kms_sw_irq_get(rdev, i);
- }
+ last_seq[i] = atomic64_read(&rdev->fence_drv[i].last_seq);
+ trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
+ radeon_irq_kms_sw_irq_get(rdev, i);
+ }
+ while (!signaled) {
if (intr) {
r = wait_event_interruptible_timeout(rdev->fence_queue, (
(signaled = radeon_fence_any_seq_signaled(rdev, target_seq))
- || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT);
+ || rdev->needs_reset), MAX_SCHEDULE_TIMEOUT);
} else {
r = wait_event_timeout(rdev->fence_queue, (
(signaled = radeon_fence_any_seq_signaled(rdev, target_seq))
- || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT);
+ || rdev->needs_reset), MAX_SCHEDULE_TIMEOUT);
}
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
- if (!target_seq[i])
- continue;
+ if (r < 0)
+ break;
- radeon_irq_kms_sw_irq_put(rdev, i);
- trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
+ if (rdev->needs_reset) {
+ r = -EDEADLK;
+ break;
}
+ }
- if (unlikely(r < 0))
- return r;
+ for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+ if (!target_seq[i])
+ continue;
- if (unlikely(!signaled)) {
- if (rdev->needs_reset)
- return -EDEADLK;
-
- /* we were interrupted for some reason and fence
- * isn't signaled yet, resume waiting */
- if (r)
- continue;
-
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
- if (!target_seq[i])
- continue;
-
- if (last_seq[i] != atomic64_read(&rdev->fence_drv[i].last_seq))
- break;
- }
-
- if (i != RADEON_NUM_RINGS)
- continue;
-
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
- if (!target_seq[i])
- continue;
-
- if (radeon_ring_is_lockup(rdev, i, &rdev->ring[i]))
- break;
- }
-
- if (i < RADEON_NUM_RINGS) {
- /* good news we believe it's a lockup */
- dev_warn(rdev->dev, "GPU lockup (waiting for "
- "0x%016llx last fence id 0x%016llx on"
- " ring %d)\n",
- target_seq[i], last_seq[i], i);
-
- /* remember that we need an reset */
- rdev->needs_reset = true;
- wake_up_all(&rdev->fence_queue);
- return -EDEADLK;
- }
- }
+ radeon_irq_kms_sw_irq_put(rdev, i);
+ trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
}
- return 0;
+
+ return r < 0 ? r : 0;
}
/**
@@ -711,6 +741,9 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
rdev->fence_drv[ring].sync_seq[i] = 0;
atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
rdev->fence_drv[ring].initialized = false;
+ INIT_DELAYED_WORK(&rdev->fence_drv[ring].fence_check_work,
+ radeon_fence_driver_check_lockup);
+ rdev->fence_drv[ring].rdev = rdev;
}
/**
@@ -760,6 +793,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
/* no need to trigger GPU reset as we are unloading */
radeon_fence_driver_force_completion(rdev);
}
+ cancel_delayed_work_sync(&rdev->fence_drv[ring].fence_check_work);
wake_up_all(&rdev->fence_queue);
radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
rdev->fence_drv[ring].initialized = false;
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com> --- V1 had a nasty bug breaking gpu lockup recovery. The fix is not allowing radeon_fence_driver_check_lockup to take exclusive_lock, and kill it during lockup recovery instead. V2 used delayed work that ran during lockup recovery, but required read lock. I've fixed this by downgrading the write, and retrying if recovery fails. Current v3 uses queue_delayed_work instead of mod_delayed_work, because of a livelock with ttm delayed destroy. It also only enables the delayed work if kms irq's are enabled, and because of that it looked better to move sw_irq_get/put in radeon_fence_wait_seq a little to only be called once. --- drivers/gpu/drm/radeon/radeon.h | 2 + drivers/gpu/drm/radeon/radeon_fence.c | 162 ++++++++++++++++++++-------------- 2 files changed, 100 insertions(+), 64 deletions(-)