Message ID | 20201125031708.6433-2-luben.tuikov@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Allow to extend the timeout without jobs disappearing | expand |
Am 25.11.20 um 04:17 schrieb Luben Tuikov: > Rename "node" to "list" in struct drm_sched_job, > in order to make it consistent with what we see > being used throughout gpu_scheduler.h, for > instance in struct drm_sched_entity, as well as > the rest of DRM and the kernel. > > Signed-off-by: Luben Tuikov <luben.tuikov@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 6 +++--- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +- > drivers/gpu/drm/scheduler/sched_main.c | 23 +++++++++++---------- > include/drm/gpu_scheduler.h | 4 ++-- > 5 files changed, 19 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > index 5c1f3725c741..8358cae0b5a4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > @@ -1427,7 +1427,7 @@ static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched) > struct dma_fence *fence; > > spin_lock(&sched->job_list_lock); > - list_for_each_entry(s_job, &sched->ring_mirror_list, node) { > + list_for_each_entry(s_job, &sched->ring_mirror_list, list) { > fence = sched->ops->run_job(s_job); > dma_fence_put(fence); > } > @@ -1459,10 +1459,10 @@ static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring) > > no_preempt: > spin_lock(&sched->job_list_lock); > - list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { > + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, list) { > if (dma_fence_is_signaled(&s_job->s_fence->finished)) { > /* remove job from ring_mirror_list */ > - list_del_init(&s_job->node); > + list_del_init(&s_job->list); > sched->ops->free_job(s_job); > continue; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 7560b05e4ac1..4df6de81cd41 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -4128,7 +4128,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) > > spin_lock(&ring->sched.job_list_lock); > job = list_first_entry_or_null(&ring->sched.ring_mirror_list, > - struct drm_sched_job, node); > + struct drm_sched_job, list); > spin_unlock(&ring->sched.job_list_lock); > if (job) > return true; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > index dcfe8a3b03ff..aca52a46b93d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > @@ -271,7 +271,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) > } > > /* Signal all jobs already scheduled to HW */ > - list_for_each_entry(s_job, &sched->ring_mirror_list, node) { > + list_for_each_entry(s_job, &sched->ring_mirror_list, list) { > struct drm_sched_fence *s_fence = s_job->s_fence; > > dma_fence_set_error(&s_fence->finished, -EHWPOISON); > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > index c6332d75025e..c52eba407ebd 100644 > --- a/drivers/gpu/drm/scheduler/sched_main.c > +++ b/drivers/gpu/drm/scheduler/sched_main.c > @@ -272,7 +272,7 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) > struct drm_gpu_scheduler *sched = s_job->sched; > > spin_lock(&sched->job_list_lock); > - list_add_tail(&s_job->node, &sched->ring_mirror_list); > + list_add_tail(&s_job->list, &sched->ring_mirror_list); > drm_sched_start_timeout(sched); > spin_unlock(&sched->job_list_lock); > } > @@ -287,7 +287,7 @@ static void drm_sched_job_timedout(struct work_struct *work) > /* Protects against concurrent deletion in drm_sched_get_cleanup_job */ > spin_lock(&sched->job_list_lock); > job = list_first_entry_or_null(&sched->ring_mirror_list, > - struct drm_sched_job, node); > + struct drm_sched_job, list); > > if (job) { > /* > @@ -295,7 +295,7 @@ static void drm_sched_job_timedout(struct work_struct *work) > * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread > * is parked at which point it's safe. > */ > - list_del_init(&job->node); > + list_del_init(&job->list); > spin_unlock(&sched->job_list_lock); > > job->sched->ops->timedout_job(job); > @@ -392,7 +392,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) > * Add at the head of the queue to reflect it was the earliest > * job extracted. > */ > - list_add(&bad->node, &sched->ring_mirror_list); > + list_add(&bad->list, &sched->ring_mirror_list); > > /* > * Iterate the job list from later to earlier one and either deactive > @@ -400,7 +400,8 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) > * signaled. > * This iteration is thread safe as sched thread is stopped. > */ > - list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, node) { > + list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, > + list) { > if (s_job->s_fence->parent && > dma_fence_remove_callback(s_job->s_fence->parent, > &s_job->cb)) { > @@ -411,7 +412,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) > * Locking here is for concurrent resume timeout > */ > spin_lock(&sched->job_list_lock); > - list_del_init(&s_job->node); > + list_del_init(&s_job->list); > spin_unlock(&sched->job_list_lock); > > /* > @@ -462,7 +463,7 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) > * so no new jobs are being inserted or removed. Also concurrent > * GPU recovers can't run in parallel. > */ > - list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { > + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, list) { > struct dma_fence *fence = s_job->s_fence->parent; > > atomic_inc(&sched->hw_rq_count); > @@ -505,7 +506,7 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched) > bool found_guilty = false; > struct dma_fence *fence; > > - list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { > + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, list) { > struct drm_sched_fence *s_fence = s_job->s_fence; > > if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) { > @@ -565,7 +566,7 @@ int drm_sched_job_init(struct drm_sched_job *job, > return -ENOMEM; > job->id = atomic64_inc_return(&sched->job_id_count); > > - INIT_LIST_HEAD(&job->node); > + INIT_LIST_HEAD(&job->list); > > return 0; > } > @@ -684,11 +685,11 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) > spin_lock(&sched->job_list_lock); > > job = list_first_entry_or_null(&sched->ring_mirror_list, > - struct drm_sched_job, node); > + struct drm_sched_job, list); > > if (job && dma_fence_is_signaled(&job->s_fence->finished)) { > /* remove job from ring_mirror_list */ > - list_del_init(&job->node); > + list_del_init(&job->list); > } else { > job = NULL; > /* queue timeout for next job */ > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > index 92436553fd6a..3add0072bd37 100644 > --- a/include/drm/gpu_scheduler.h > +++ b/include/drm/gpu_scheduler.h > @@ -189,14 +189,14 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); > */ > struct drm_sched_job { > struct spsc_node queue_node; > + struct list_head list; > struct drm_gpu_scheduler *sched; > struct drm_sched_fence *s_fence; > struct dma_fence_cb finish_cb; > - struct list_head node; > uint64_t id; > atomic_t karma; > enum drm_sched_priority s_priority; > - struct drm_sched_entity *entity; > + struct drm_sched_entity *entity; > struct dma_fence_cb cb; > }; >
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 5c1f3725c741..8358cae0b5a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1427,7 +1427,7 @@ static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched) struct dma_fence *fence; spin_lock(&sched->job_list_lock); - list_for_each_entry(s_job, &sched->ring_mirror_list, node) { + list_for_each_entry(s_job, &sched->ring_mirror_list, list) { fence = sched->ops->run_job(s_job); dma_fence_put(fence); } @@ -1459,10 +1459,10 @@ static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring) no_preempt: spin_lock(&sched->job_list_lock); - list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, list) { if (dma_fence_is_signaled(&s_job->s_fence->finished)) { /* remove job from ring_mirror_list */ - list_del_init(&s_job->node); + list_del_init(&s_job->list); sched->ops->free_job(s_job); continue; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7560b05e4ac1..4df6de81cd41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4128,7 +4128,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) spin_lock(&ring->sched.job_list_lock); job = list_first_entry_or_null(&ring->sched.ring_mirror_list, - struct drm_sched_job, node); + struct drm_sched_job, list); spin_unlock(&ring->sched.job_list_lock); if (job) return true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index dcfe8a3b03ff..aca52a46b93d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -271,7 +271,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) } /* Signal all jobs already scheduled to HW */ - list_for_each_entry(s_job, &sched->ring_mirror_list, node) { + list_for_each_entry(s_job, &sched->ring_mirror_list, list) { struct drm_sched_fence *s_fence = s_job->s_fence; dma_fence_set_error(&s_fence->finished, -EHWPOISON); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index c6332d75025e..c52eba407ebd 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -272,7 +272,7 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) struct drm_gpu_scheduler *sched = s_job->sched; spin_lock(&sched->job_list_lock); - list_add_tail(&s_job->node, &sched->ring_mirror_list); + list_add_tail(&s_job->list, &sched->ring_mirror_list); drm_sched_start_timeout(sched); spin_unlock(&sched->job_list_lock); } @@ -287,7 +287,7 @@ static void drm_sched_job_timedout(struct work_struct *work) /* Protects against concurrent deletion in drm_sched_get_cleanup_job */ spin_lock(&sched->job_list_lock); job = list_first_entry_or_null(&sched->ring_mirror_list, - struct drm_sched_job, node); + struct drm_sched_job, list); if (job) { /* @@ -295,7 +295,7 @@ static void drm_sched_job_timedout(struct work_struct *work) * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread * is parked at which point it's safe. */ - list_del_init(&job->node); + list_del_init(&job->list); spin_unlock(&sched->job_list_lock); job->sched->ops->timedout_job(job); @@ -392,7 +392,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) * Add at the head of the queue to reflect it was the earliest * job extracted. */ - list_add(&bad->node, &sched->ring_mirror_list); + list_add(&bad->list, &sched->ring_mirror_list); /* * Iterate the job list from later to earlier one and either deactive @@ -400,7 +400,8 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) * signaled. * This iteration is thread safe as sched thread is stopped. */ - list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, node) { + list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, + list) { if (s_job->s_fence->parent && dma_fence_remove_callback(s_job->s_fence->parent, &s_job->cb)) { @@ -411,7 +412,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) * Locking here is for concurrent resume timeout */ spin_lock(&sched->job_list_lock); - list_del_init(&s_job->node); + list_del_init(&s_job->list); spin_unlock(&sched->job_list_lock); /* @@ -462,7 +463,7 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) * so no new jobs are being inserted or removed. Also concurrent * GPU recovers can't run in parallel. */ - list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, list) { struct dma_fence *fence = s_job->s_fence->parent; atomic_inc(&sched->hw_rq_count); @@ -505,7 +506,7 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched) bool found_guilty = false; struct dma_fence *fence; - list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, list) { struct drm_sched_fence *s_fence = s_job->s_fence; if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) { @@ -565,7 +566,7 @@ int drm_sched_job_init(struct drm_sched_job *job, return -ENOMEM; job->id = atomic64_inc_return(&sched->job_id_count); - INIT_LIST_HEAD(&job->node); + INIT_LIST_HEAD(&job->list); return 0; } @@ -684,11 +685,11 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) spin_lock(&sched->job_list_lock); job = list_first_entry_or_null(&sched->ring_mirror_list, - struct drm_sched_job, node); + struct drm_sched_job, list); if (job && dma_fence_is_signaled(&job->s_fence->finished)) { /* remove job from ring_mirror_list */ - list_del_init(&job->node); + list_del_init(&job->list); } else { job = NULL; /* queue timeout for next job */ diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 92436553fd6a..3add0072bd37 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -189,14 +189,14 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); */ struct drm_sched_job { struct spsc_node queue_node; + struct list_head list; struct drm_gpu_scheduler *sched; struct drm_sched_fence *s_fence; struct dma_fence_cb finish_cb; - struct list_head node; uint64_t id; atomic_t karma; enum drm_sched_priority s_priority; - struct drm_sched_entity *entity; + struct drm_sched_entity *entity; struct dma_fence_cb cb; };
Rename "node" to "list" in struct drm_sched_job, in order to make it consistent with what we see being used throughout gpu_scheduler.h, for instance in struct drm_sched_entity, as well as the rest of DRM and the kernel. Signed-off-by: Luben Tuikov <luben.tuikov@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +- drivers/gpu/drm/scheduler/sched_main.c | 23 +++++++++++---------- include/drm/gpu_scheduler.h | 4 ++-- 5 files changed, 19 insertions(+), 18 deletions(-)