@@ -450,7 +450,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
num_hw_submission, amdgpu_job_hang_limit,
- timeout, ring->name);
+ timeout, ring->name, false);
if (r) {
DRM_ERROR("Failed to create scheduler on ring %s.\n",
ring->name);
@@ -178,7 +178,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
- msecs_to_jiffies(500), dev_name(gpu->dev));
+ msecs_to_jiffies(500), dev_name(gpu->dev), true);
if (ret)
return ret;
@@ -130,7 +130,14 @@ drm_sched_entity_get_free_sched(struct drm_sched_entity *entity)
int i;
for (i = 0; i < entity->num_rq_list; ++i) {
- num_jobs = atomic_read(&entity->rq_list[i]->sched->num_jobs);
+ struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched;
+
+ if (!entity->rq_list[i]->sched->ready) {
+ DRM_WARN("sched%s is not ready, skipping", sched->name);
+ continue;
+ }
+
+ num_jobs = atomic_read(&sched->num_jobs);
if (num_jobs < min_jobs) {
min_jobs = num_jobs;
rq = entity->rq_list[i];
@@ -420,6 +420,9 @@ int drm_sched_job_init(struct drm_sched_job *job,
struct drm_gpu_scheduler *sched;
drm_sched_entity_select_rq(entity);
+ if (!entity->rq)
+ return -ENOENT;
+
sched = entity->rq->sched;
job->sched = sched;
@@ -598,6 +601,7 @@ static int drm_sched_main(void *param)
* @hang_limit: number of times to allow a job to hang before dropping it
* @timeout: timeout value in jiffies for the scheduler
* @name: name used for debugging
+ * @ready: marks if the underlying HW is ready to work
*
* Return 0 on success, otherwise error code.
*/
@@ -606,7 +610,8 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
unsigned hw_submission,
unsigned hang_limit,
long timeout,
- const char *name)
+ const char *name,
+ bool ready)
{
int i;
sched->ops = ops;
@@ -633,6 +638,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
return PTR_ERR(sched->thread);
}
+ sched->ready = ready;
return 0;
}
EXPORT_SYMBOL(drm_sched_init);
@@ -648,5 +654,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
{
if (sched->thread)
kthread_stop(sched->thread);
+
+ sched->ready = false;
}
EXPORT_SYMBOL(drm_sched_fini);
@@ -212,7 +212,7 @@ v3d_sched_init(struct v3d_dev *v3d)
&v3d_sched_ops,
hw_jobs_limit, job_hang_limit,
msecs_to_jiffies(hang_limit_ms),
- "v3d_bin");
+ "v3d_bin", true);
if (ret) {
dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret);
return ret;
@@ -222,7 +222,7 @@ v3d_sched_init(struct v3d_dev *v3d)
&v3d_sched_ops,
hw_jobs_limit, job_hang_limit,
msecs_to_jiffies(hang_limit_ms),
- "v3d_render");
+ "v3d_render", true);
if (ret) {
dev_err(v3d->dev, "Failed to create render scheduler: %d.",
ret);
@@ -264,6 +264,7 @@ struct drm_sched_backend_ops {
* @hang_limit: once the hangs by a job crosses this limit then it is marked
* guilty and it will be considered for scheduling further.
* @num_jobs: the number of jobs in queue in the scheduler
+ * @ready: marks if the underlying HW is ready to work
*
* One scheduler is implemented for each hardware ring.
*/
@@ -283,12 +284,14 @@ struct drm_gpu_scheduler {
spinlock_t job_list_lock;
int hang_limit;
atomic_t num_jobs;
+ bool ready;
};
int drm_sched_init(struct drm_gpu_scheduler *sched,
const struct drm_sched_backend_ops *ops,
uint32_t hw_submission, unsigned hang_limit, long timeout,
- const char *name);
+ const char *name,
+ bool ready);
void drm_sched_fini(struct drm_gpu_scheduler *sched);
int drm_sched_job_init(struct drm_sched_job *job,
struct drm_sched_entity *entity,
Problem: A particular scheduler may become unsuable (underlying HW) after some event (e.g. GPU reset). If it's later chosen by the get free sched. policy a command will fail to be submitted. Fix: Add a driver specific callback to report the sched status so rq with bad sched can be avoided in favor of working one or none in which case job init will fail. v2: Switch from driver callback to flag in scheduler. Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_sched.c | 2 +- drivers/gpu/drm/scheduler/sched_entity.c | 9 ++++++++- drivers/gpu/drm/scheduler/sched_main.c | 10 +++++++++- drivers/gpu/drm/v3d/v3d_sched.c | 4 ++-- include/drm/gpu_scheduler.h | 5 ++++- 6 files changed, 25 insertions(+), 7 deletions(-)