@@ -151,7 +151,8 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
EXPORT_SYMBOL(to_drm_sched_fence);
struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity,
- void *owner)
+ void *owner,
+ struct drm_sched_job *s_job)
{
struct drm_sched_fence *fence = NULL;
unsigned seq;
@@ -163,6 +164,7 @@ struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity,
fence->owner = owner;
fence->sched = entity->rq->sched;
spin_lock_init(&fence->lock);
+ fence->s_job = s_job;
seq = atomic_inc_return(&entity->fence_seq);
dma_fence_init(&fence->scheduled, &drm_sched_fence_ops_scheduled,
@@ -284,8 +284,6 @@ static void drm_sched_job_finish(struct work_struct *work)
cancel_delayed_work_sync(&sched->work_tdr);
spin_lock_irqsave(&sched->job_list_lock, flags);
- /* remove job from ring_mirror_list */
- list_del_init(&s_job->node);
/* queue TDR for next job */
drm_sched_start_timeout(sched);
spin_unlock_irqrestore(&sched->job_list_lock, flags);
@@ -293,22 +291,11 @@ static void drm_sched_job_finish(struct work_struct *work)
sched->ops->free_job(s_job);
}
-static void drm_sched_job_finish_cb(struct dma_fence *f,
- struct dma_fence_cb *cb)
-{
- struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
- finish_cb);
- schedule_work(&job->finish_work);
-}
-
static void drm_sched_job_begin(struct drm_sched_job *s_job)
{
struct drm_gpu_scheduler *sched = s_job->sched;
unsigned long flags;
- dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb,
- drm_sched_job_finish_cb);
-
spin_lock_irqsave(&sched->job_list_lock, flags);
list_add_tail(&s_job->node, &sched->ring_mirror_list);
drm_sched_start_timeout(sched);
@@ -363,8 +350,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad,
dma_fence_put(s_job->s_fence->parent);
s_job->s_fence->parent = NULL;
atomic_dec(&sched->hw_rq_count);
- }
- else {
+ } else {
/* TODO Is it get/put neccessey here ? */
dma_fence_get(&s_job->s_fence->finished);
list_add(&s_job->finish_node, &wait_list);
@@ -423,7 +409,13 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only)
if (unpark_only)
goto unpark;
- spin_lock_irqsave(&sched->job_list_lock, flags);
+ /*
+ * Locking the list is not required here as the sched thread is parked
+ * so no new jobs are being pushed in to HW and in drm_sched_stop we
+ * flushed all the jobs who were still in mirror list but who already
+ * signaled and removed them self from the list. Also concurrent
+ * GPU recovers can't run in parallel.
+ */
list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
struct drm_sched_fence *s_fence = s_job->s_fence;
struct dma_fence *fence = s_job->s_fence->parent;
@@ -441,7 +433,6 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only)
}
drm_sched_start_timeout(sched);
- spin_unlock_irqrestore(&sched->job_list_lock, flags);
unpark:
kthread_unpark(sched->thread);
@@ -505,7 +496,7 @@ int drm_sched_job_init(struct drm_sched_job *job,
job->sched = sched;
job->entity = entity;
job->s_priority = entity->rq - sched->sched_rq;
- job->s_fence = drm_sched_fence_create(entity, owner);
+ job->s_fence = drm_sched_fence_create(entity, owner, job);
if (!job->s_fence)
return -ENOMEM;
job->id = atomic64_inc_return(&sched->job_id_count);
@@ -593,15 +584,25 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
struct drm_sched_fence *s_fence =
container_of(cb, struct drm_sched_fence, cb);
struct drm_gpu_scheduler *sched = s_fence->sched;
+ struct drm_sched_job *s_job = s_fence->s_job;
+ unsigned long flags;
+
+ cancel_delayed_work(&sched->work_tdr);
- dma_fence_get(&s_fence->finished);
atomic_dec(&sched->hw_rq_count);
atomic_dec(&sched->num_jobs);
+
+ spin_lock_irqsave(&sched->job_list_lock, flags);
+ /* remove job from ring_mirror_list */
+ list_del_init(&s_job->node);
+ spin_unlock_irqrestore(&sched->job_list_lock, flags);
+
drm_sched_fence_finished(s_fence);
trace_drm_sched_process_job(s_fence);
- dma_fence_put(&s_fence->finished);
wake_up_interruptible(&sched->wake_up_worker);
+
+ schedule_work(&s_job->finish_work);
}
/**
@@ -115,6 +115,8 @@ struct drm_sched_rq {
struct drm_sched_entity *current_entity;
};
+struct drm_sched_job;
+
/**
* struct drm_sched_fence - fences corresponding to the scheduling of a job.
*/
@@ -160,6 +162,9 @@ struct drm_sched_fence {
* @owner: job owner for debugging
*/
void *owner;
+
+ /* Back pointer to owning job */
+ struct drm_sched_job *s_job;
};
struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
@@ -330,8 +335,9 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
enum drm_sched_priority priority);
bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
-struct drm_sched_fence *drm_sched_fence_create(
- struct drm_sched_entity *s_entity, void *owner);
+struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *s_entity,
+ void *owner,
+ struct drm_sched_job *s_job);
void drm_sched_fence_scheduled(struct drm_sched_fence *fence);
void drm_sched_fence_finished(struct drm_sched_fence *fence);
Expedite job deletion from ring mirror list to the HW fence signal callback instead from finish_work, together with waiting for all such fences to signal in drm_sched_stop we garantee that already signaled job will not be processed twice. Remove the sched finish fence callback and just submit finish_work directly from the HW fence callback. v2: Fix comments. Suggested-by: Christian Koenig <Christian.Koenig@amd.com> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> --- drivers/gpu/drm/scheduler/sched_fence.c | 4 +++- drivers/gpu/drm/scheduler/sched_main.c | 41 +++++++++++++++++---------------- include/drm/gpu_scheduler.h | 10 ++++++-- 3 files changed, 32 insertions(+), 23 deletions(-)