@@ -1188,7 +1188,6 @@ static void drm_sched_run_job_work(struct work_struct *w)
container_of(w, struct drm_gpu_scheduler, work_run_job);
struct drm_sched_entity *entity;
struct dma_fence *fence;
- struct drm_sched_fence *s_fence;
struct drm_sched_job *sched_job;
int r;
@@ -1207,15 +1206,12 @@ static void drm_sched_run_job_work(struct work_struct *w)
return;
}
- s_fence = sched_job->s_fence;
-
atomic_add(sched_job->credits, &sched->credit_count);
drm_sched_job_begin(sched_job);
trace_drm_run_job(sched_job, entity);
fence = sched->ops->run_job(sched_job);
- complete_all(&entity->entity_idle);
- drm_sched_fence_scheduled(s_fence, fence);
+ drm_sched_fence_scheduled(sched_job->s_fence, fence);
if (!IS_ERR_OR_NULL(fence)) {
/* Drop for original kref_init of the fence */
@@ -1232,6 +1228,7 @@ static void drm_sched_run_job_work(struct work_struct *w)
PTR_ERR(fence) : 0);
}
+ complete_all(&entity->entity_idle);
wake_up(&sched->job_scheduled);
drm_sched_run_job_queue(sched);
}
After commit f7fe64ad0f22 ("drm/sched: Split free_job into own work item") and with drivers who use the unordered workqueue sched_jobs can be freed in parallel as soon as the complete_all(&entity->entity_idle) is called. This makes all dereferencing in the lower part of the worker unsafe so lets fix it by moving the complete_all() call to after the worker is done touching the job. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com> Fixes: f7fe64ad0f22 ("drm/sched: Split free_job into own work item") Cc: Christian König <christian.koenig@amd.com> Cc: Danilo Krummrich <dakr@redhat.com> Cc: Matthew Brost <matthew.brost@intel.com> Cc: Philipp Stanner <pstanner@redhat.com> Cc: <stable@vger.kernel.org> # v6.8+ --- drivers/gpu/drm/scheduler/sched_main.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-)