Message ID | 1544118074-24910-2-git-send-email-andrey.grodzovsky@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/2] drm/sched: Refactor ring mirror list handling. | expand |
On 12/06/2018 12:41 PM, Andrey Grodzovsky wrote: > Expedite job deletion from ring mirror list to the HW fence signal > callback instead from finish_work, together with waiting for all > such fences to signal in drm_sched_stop we garantee that > already signaled job will not be processed twice. > Remove the sched finish fence callback and just submit finish_work > directly from the HW fence callback. > > Suggested-by: Christian Koenig <Christian.Koenig@amd.com> > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> > --- > drivers/gpu/drm/scheduler/sched_fence.c | 4 +++- > drivers/gpu/drm/scheduler/sched_main.c | 39 ++++++++++++++++----------------- > include/drm/gpu_scheduler.h | 10 +++++++-- > 3 files changed, 30 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c > index d8d2dff..e62c239 100644 > --- a/drivers/gpu/drm/scheduler/sched_fence.c > +++ b/drivers/gpu/drm/scheduler/sched_fence.c > @@ -151,7 +151,8 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f) > EXPORT_SYMBOL(to_drm_sched_fence); > > struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity, > - void *owner) > + void *owner, > + struct drm_sched_job *s_job) > { > struct drm_sched_fence *fence = NULL; > unsigned seq; > @@ -163,6 +164,7 @@ struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity, > fence->owner = owner; > fence->sched = entity->rq->sched; > spin_lock_init(&fence->lock); > + fence->s_job = s_job; > > seq = atomic_inc_return(&entity->fence_seq); > dma_fence_init(&fence->scheduled, &drm_sched_fence_ops_scheduled, > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > index 8fb7f86..2860037 100644 > --- a/drivers/gpu/drm/scheduler/sched_main.c > +++ b/drivers/gpu/drm/scheduler/sched_main.c > @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct work_struct *work) > cancel_delayed_work_sync(&sched->work_tdr); > > spin_lock_irqsave(&sched->job_list_lock, flags); > - /* remove job from ring_mirror_list */ > - list_del_init(&s_job->node); > - /* queue TDR for next job */ > drm_sched_start_timeout(sched); > spin_unlock_irqrestore(&sched->job_list_lock, flags); > > sched->ops->free_job(s_job); > } > > -static void drm_sched_job_finish_cb(struct dma_fence *f, > - struct dma_fence_cb *cb) > -{ > - struct drm_sched_job *job = container_of(cb, struct drm_sched_job, > - finish_cb); > - schedule_work(&job->finish_work); > -} > - > static void drm_sched_job_begin(struct drm_sched_job *s_job) > { > struct drm_gpu_scheduler *sched = s_job->sched; > unsigned long flags; > > - dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb, > - drm_sched_job_finish_cb); > - > spin_lock_irqsave(&sched->job_list_lock, flags); > list_add_tail(&s_job->node, &sched->ring_mirror_list); > drm_sched_start_timeout(sched); > @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only) > { > struct drm_sched_job *s_job, *tmp; > bool found_guilty = false; > - unsigned long flags; > int r; > > if (unpark_only) > goto unpark; > > - spin_lock_irqsave(&sched->job_list_lock, flags); > + /* > + * Locking the list is not required here as the sched thread is parked > + * so no new jobs are being pushed in to HW and in drm_sched_stop we > + * flushed any in flight jobs who didn't signal yet. The comment is inaccurate here - it's supposed to be ' any in flight jobs who already have their sched finished signaled and they are removed from the mirror ring list at that point already anyway' I will fix this text later with other comments received on the patches. Andrey > Also concurrent > + * GPU recovers can't run in parallel. > + */ > list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { > struct drm_sched_fence *s_fence = s_job->s_fence; > struct dma_fence *fence; > @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only) > } > > drm_sched_start_timeout(sched); > - spin_unlock_irqrestore(&sched->job_list_lock, flags); > > unpark: > kthread_unpark(sched->thread); > @@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sched_job *job, > job->sched = sched; > job->entity = entity; > job->s_priority = entity->rq - sched->sched_rq; > - job->s_fence = drm_sched_fence_create(entity, owner); > + job->s_fence = drm_sched_fence_create(entity, owner, job); > if (!job->s_fence) > return -ENOMEM; > job->id = atomic64_inc_return(&sched->job_id_count); > @@ -593,15 +582,25 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) > struct drm_sched_fence *s_fence = > container_of(cb, struct drm_sched_fence, cb); > struct drm_gpu_scheduler *sched = s_fence->sched; > + struct drm_sched_job *s_job = s_fence->s_job; > + unsigned long flags; > + > + cancel_delayed_work(&sched->work_tdr); > > - dma_fence_get(&s_fence->finished); > atomic_dec(&sched->hw_rq_count); > atomic_dec(&sched->num_jobs); > + > + spin_lock_irqsave(&sched->job_list_lock, flags); > + /* remove job from ring_mirror_list */ > + list_del_init(&s_job->node); > + spin_unlock_irqrestore(&sched->job_list_lock, flags); > + > drm_sched_fence_finished(s_fence); > > trace_drm_sched_process_job(s_fence); > - dma_fence_put(&s_fence->finished); > wake_up_interruptible(&sched->wake_up_worker); > + > + schedule_work(&s_job->finish_work); > } > > /** > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > index c94b592..23855c6 100644 > --- a/include/drm/gpu_scheduler.h > +++ b/include/drm/gpu_scheduler.h > @@ -115,6 +115,8 @@ struct drm_sched_rq { > struct drm_sched_entity *current_entity; > }; > > +struct drm_sched_job; > + > /** > * struct drm_sched_fence - fences corresponding to the scheduling of a job. > */ > @@ -160,6 +162,9 @@ struct drm_sched_fence { > * @owner: job owner for debugging > */ > void *owner; > + > + /* Back pointer to owning job */ > + struct drm_sched_job *s_job; > }; > > struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); > @@ -330,8 +335,9 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity, > enum drm_sched_priority priority); > bool drm_sched_entity_is_ready(struct drm_sched_entity *entity); > > -struct drm_sched_fence *drm_sched_fence_create( > - struct drm_sched_entity *s_entity, void *owner); > +struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *s_entity, > + void *owner, > + struct drm_sched_job *s_job); > void drm_sched_fence_scheduled(struct drm_sched_fence *fence); > void drm_sched_fence_finished(struct drm_sched_fence *fence); >
> -----Original Message----- > From: dri-devel <dri-devel-bounces@lists.freedesktop.org> On Behalf Of > Andrey Grodzovsky > Sent: Friday, December 07, 2018 1:41 AM > To: dri-devel@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; > ckoenig.leichtzumerken@gmail.com; eric@anholt.net; > etnaviv@lists.freedesktop.org > Cc: Liu, Monk <Monk.Liu@amd.com> > Subject: [PATCH 2/2] drm/sched: Rework HW fence processing. > > Expedite job deletion from ring mirror list to the HW fence signal callback > instead from finish_work, together with waiting for all such fences to signal in > drm_sched_stop we garantee that already signaled job will not be processed > twice. > Remove the sched finish fence callback and just submit finish_work directly > from the HW fence callback. > > Suggested-by: Christian Koenig <Christian.Koenig@amd.com> > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> > --- > drivers/gpu/drm/scheduler/sched_fence.c | 4 +++- > drivers/gpu/drm/scheduler/sched_main.c | 39 ++++++++++++++++---------- > ------- > include/drm/gpu_scheduler.h | 10 +++++++-- > 3 files changed, 30 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/scheduler/sched_fence.c > b/drivers/gpu/drm/scheduler/sched_fence.c > index d8d2dff..e62c239 100644 > --- a/drivers/gpu/drm/scheduler/sched_fence.c > +++ b/drivers/gpu/drm/scheduler/sched_fence.c > @@ -151,7 +151,8 @@ struct drm_sched_fence > *to_drm_sched_fence(struct dma_fence *f) > EXPORT_SYMBOL(to_drm_sched_fence); > > struct drm_sched_fence *drm_sched_fence_create(struct > drm_sched_entity *entity, > - void *owner) > + void *owner, > + struct drm_sched_job *s_job) > { > struct drm_sched_fence *fence = NULL; > unsigned seq; > @@ -163,6 +164,7 @@ struct drm_sched_fence > *drm_sched_fence_create(struct drm_sched_entity *entity, > fence->owner = owner; > fence->sched = entity->rq->sched; > spin_lock_init(&fence->lock); > + fence->s_job = s_job; > > seq = atomic_inc_return(&entity->fence_seq); > dma_fence_init(&fence->scheduled, > &drm_sched_fence_ops_scheduled, diff --git > a/drivers/gpu/drm/scheduler/sched_main.c > b/drivers/gpu/drm/scheduler/sched_main.c > index 8fb7f86..2860037 100644 > --- a/drivers/gpu/drm/scheduler/sched_main.c > +++ b/drivers/gpu/drm/scheduler/sched_main.c > @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct > work_struct *work) > cancel_delayed_work_sync(&sched->work_tdr); > > spin_lock_irqsave(&sched->job_list_lock, flags); > - /* remove job from ring_mirror_list */ > - list_del_init(&s_job->node); > - /* queue TDR for next job */ > drm_sched_start_timeout(sched); > spin_unlock_irqrestore(&sched->job_list_lock, flags); > > sched->ops->free_job(s_job); > } > > -static void drm_sched_job_finish_cb(struct dma_fence *f, > - struct dma_fence_cb *cb) > -{ > - struct drm_sched_job *job = container_of(cb, struct drm_sched_job, > - finish_cb); > - schedule_work(&job->finish_work); > -} > - > static void drm_sched_job_begin(struct drm_sched_job *s_job) { > struct drm_gpu_scheduler *sched = s_job->sched; > unsigned long flags; > > - dma_fence_add_callback(&s_job->s_fence->finished, &s_job- > >finish_cb, > - drm_sched_job_finish_cb); > - > spin_lock_irqsave(&sched->job_list_lock, flags); > list_add_tail(&s_job->node, &sched->ring_mirror_list); > drm_sched_start_timeout(sched); > @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler > *sched, bool unpark_only) { > struct drm_sched_job *s_job, *tmp; > bool found_guilty = false; > - unsigned long flags; > int r; > > if (unpark_only) > goto unpark; > > - spin_lock_irqsave(&sched->job_list_lock, flags); > + /* > + * Locking the list is not required here as the sched thread is parked > + * so no new jobs are being pushed in to HW and in drm_sched_stop > we > + * flushed any in flight jobs who didn't signal yet. Also concurrent > + * GPU recovers can't run in parallel. > + */ > list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, > node) { > struct drm_sched_fence *s_fence = s_job->s_fence; > struct dma_fence *fence; > @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler > *sched, bool unpark_only) > } > > drm_sched_start_timeout(sched); > - spin_unlock_irqrestore(&sched->job_list_lock, flags); > > unpark: > kthread_unpark(sched->thread); > @@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sched_job *job, > job->sched = sched; > job->entity = entity; > job->s_priority = entity->rq - sched->sched_rq; > - job->s_fence = drm_sched_fence_create(entity, owner); > + job->s_fence = drm_sched_fence_create(entity, owner, job); > if (!job->s_fence) > return -ENOMEM; > job->id = atomic64_inc_return(&sched->job_id_count); > @@ -593,15 +582,25 @@ static void drm_sched_process_job(struct > dma_fence *f, struct dma_fence_cb *cb) > struct drm_sched_fence *s_fence = > container_of(cb, struct drm_sched_fence, cb); > struct drm_gpu_scheduler *sched = s_fence->sched; > + struct drm_sched_job *s_job = s_fence->s_job; Seems we are back to original, Christian argued s_fence and s_job are with different lifetime , Do their lifetime become same now? -David > + unsigned long flags; > + > + cancel_delayed_work(&sched->work_tdr); > > - dma_fence_get(&s_fence->finished); > atomic_dec(&sched->hw_rq_count); > atomic_dec(&sched->num_jobs); > + > + spin_lock_irqsave(&sched->job_list_lock, flags); > + /* remove job from ring_mirror_list */ > + list_del_init(&s_job->node); > + spin_unlock_irqrestore(&sched->job_list_lock, flags); > + > drm_sched_fence_finished(s_fence); > > trace_drm_sched_process_job(s_fence); > - dma_fence_put(&s_fence->finished); > wake_up_interruptible(&sched->wake_up_worker); > + > + schedule_work(&s_job->finish_work); > } > > /** > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > index c94b592..23855c6 100644 > --- a/include/drm/gpu_scheduler.h > +++ b/include/drm/gpu_scheduler.h > @@ -115,6 +115,8 @@ struct drm_sched_rq { > struct drm_sched_entity *current_entity; > }; > > +struct drm_sched_job; > + > /** > * struct drm_sched_fence - fences corresponding to the scheduling of a job. > */ > @@ -160,6 +162,9 @@ struct drm_sched_fence { > * @owner: job owner for debugging > */ > void *owner; > + > + /* Back pointer to owning job */ > + struct drm_sched_job *s_job; > }; > > struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); @@ > -330,8 +335,9 @@ void drm_sched_entity_set_priority(struct > drm_sched_entity *entity, > enum drm_sched_priority priority); bool > drm_sched_entity_is_ready(struct drm_sched_entity *entity); > > -struct drm_sched_fence *drm_sched_fence_create( > - struct drm_sched_entity *s_entity, void *owner); > +struct drm_sched_fence *drm_sched_fence_create(struct > drm_sched_entity *s_entity, > + void *owner, > + struct drm_sched_job *s_job); > void drm_sched_fence_scheduled(struct drm_sched_fence *fence); void > drm_sched_fence_finished(struct drm_sched_fence *fence); > > -- > 2.7.4 > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel
Am 07.12.18 um 04:18 schrieb Zhou, David(ChunMing): > >> -----Original Message----- >> From: dri-devel <dri-devel-bounces@lists.freedesktop.org> On Behalf Of >> Andrey Grodzovsky >> Sent: Friday, December 07, 2018 1:41 AM >> To: dri-devel@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; >> ckoenig.leichtzumerken@gmail.com; eric@anholt.net; >> etnaviv@lists.freedesktop.org >> Cc: Liu, Monk <Monk.Liu@amd.com> >> Subject: [PATCH 2/2] drm/sched: Rework HW fence processing. >> >> Expedite job deletion from ring mirror list to the HW fence signal callback >> instead from finish_work, together with waiting for all such fences to signal in >> drm_sched_stop we garantee that already signaled job will not be processed >> twice. >> Remove the sched finish fence callback and just submit finish_work directly >> from the HW fence callback. >> >> Suggested-by: Christian Koenig <Christian.Koenig@amd.com> >> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >> --- >> drivers/gpu/drm/scheduler/sched_fence.c | 4 +++- >> drivers/gpu/drm/scheduler/sched_main.c | 39 ++++++++++++++++---------- >> ------- >> include/drm/gpu_scheduler.h | 10 +++++++-- >> 3 files changed, 30 insertions(+), 23 deletions(-) >> >> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c >> b/drivers/gpu/drm/scheduler/sched_fence.c >> index d8d2dff..e62c239 100644 >> --- a/drivers/gpu/drm/scheduler/sched_fence.c >> +++ b/drivers/gpu/drm/scheduler/sched_fence.c >> @@ -151,7 +151,8 @@ struct drm_sched_fence >> *to_drm_sched_fence(struct dma_fence *f) >> EXPORT_SYMBOL(to_drm_sched_fence); >> >> struct drm_sched_fence *drm_sched_fence_create(struct >> drm_sched_entity *entity, >> - void *owner) >> + void *owner, >> + struct drm_sched_job *s_job) >> { >> struct drm_sched_fence *fence = NULL; >> unsigned seq; >> @@ -163,6 +164,7 @@ struct drm_sched_fence >> *drm_sched_fence_create(struct drm_sched_entity *entity, >> fence->owner = owner; >> fence->sched = entity->rq->sched; >> spin_lock_init(&fence->lock); >> + fence->s_job = s_job; >> >> seq = atomic_inc_return(&entity->fence_seq); >> dma_fence_init(&fence->scheduled, >> &drm_sched_fence_ops_scheduled, diff --git >> a/drivers/gpu/drm/scheduler/sched_main.c >> b/drivers/gpu/drm/scheduler/sched_main.c >> index 8fb7f86..2860037 100644 >> --- a/drivers/gpu/drm/scheduler/sched_main.c >> +++ b/drivers/gpu/drm/scheduler/sched_main.c >> @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct >> work_struct *work) >> cancel_delayed_work_sync(&sched->work_tdr); >> >> spin_lock_irqsave(&sched->job_list_lock, flags); >> - /* remove job from ring_mirror_list */ >> - list_del_init(&s_job->node); >> - /* queue TDR for next job */ >> drm_sched_start_timeout(sched); >> spin_unlock_irqrestore(&sched->job_list_lock, flags); >> >> sched->ops->free_job(s_job); >> } >> >> -static void drm_sched_job_finish_cb(struct dma_fence *f, >> - struct dma_fence_cb *cb) >> -{ >> - struct drm_sched_job *job = container_of(cb, struct drm_sched_job, >> - finish_cb); >> - schedule_work(&job->finish_work); >> -} >> - >> static void drm_sched_job_begin(struct drm_sched_job *s_job) { >> struct drm_gpu_scheduler *sched = s_job->sched; >> unsigned long flags; >> >> - dma_fence_add_callback(&s_job->s_fence->finished, &s_job- >>> finish_cb, >> - drm_sched_job_finish_cb); >> - >> spin_lock_irqsave(&sched->job_list_lock, flags); >> list_add_tail(&s_job->node, &sched->ring_mirror_list); >> drm_sched_start_timeout(sched); >> @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler >> *sched, bool unpark_only) { >> struct drm_sched_job *s_job, *tmp; >> bool found_guilty = false; >> - unsigned long flags; >> int r; >> >> if (unpark_only) >> goto unpark; >> >> - spin_lock_irqsave(&sched->job_list_lock, flags); >> + /* >> + * Locking the list is not required here as the sched thread is parked >> + * so no new jobs are being pushed in to HW and in drm_sched_stop >> we >> + * flushed any in flight jobs who didn't signal yet. Also concurrent >> + * GPU recovers can't run in parallel. >> + */ >> list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, >> node) { >> struct drm_sched_fence *s_fence = s_job->s_fence; >> struct dma_fence *fence; >> @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler >> *sched, bool unpark_only) >> } >> >> drm_sched_start_timeout(sched); >> - spin_unlock_irqrestore(&sched->job_list_lock, flags); >> >> unpark: >> kthread_unpark(sched->thread); >> @@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sched_job *job, >> job->sched = sched; >> job->entity = entity; >> job->s_priority = entity->rq - sched->sched_rq; >> - job->s_fence = drm_sched_fence_create(entity, owner); >> + job->s_fence = drm_sched_fence_create(entity, owner, job); >> if (!job->s_fence) >> return -ENOMEM; >> job->id = atomic64_inc_return(&sched->job_id_count); >> @@ -593,15 +582,25 @@ static void drm_sched_process_job(struct >> dma_fence *f, struct dma_fence_cb *cb) >> struct drm_sched_fence *s_fence = >> container_of(cb, struct drm_sched_fence, cb); >> struct drm_gpu_scheduler *sched = s_fence->sched; >> + struct drm_sched_job *s_job = s_fence->s_job; > > Seems we are back to original, Christian argued s_fence and s_job are with different lifetime , Do their lifetime become same now? No, that still doesn't work. The scheduler fence lives much longer than the job, so we would have a dangling pointer here. The real question is why we actually need that? I mean we just need to move the callback structure into the job, don't we? Christian. > > -David > >> + unsigned long flags; >> + >> + cancel_delayed_work(&sched->work_tdr); >> >> - dma_fence_get(&s_fence->finished); >> atomic_dec(&sched->hw_rq_count); >> atomic_dec(&sched->num_jobs); >> + >> + spin_lock_irqsave(&sched->job_list_lock, flags); >> + /* remove job from ring_mirror_list */ >> + list_del_init(&s_job->node); >> + spin_unlock_irqrestore(&sched->job_list_lock, flags); >> + >> drm_sched_fence_finished(s_fence); >> >> trace_drm_sched_process_job(s_fence); >> - dma_fence_put(&s_fence->finished); >> wake_up_interruptible(&sched->wake_up_worker); >> + >> + schedule_work(&s_job->finish_work); >> } >> >> /** >> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h >> index c94b592..23855c6 100644 >> --- a/include/drm/gpu_scheduler.h >> +++ b/include/drm/gpu_scheduler.h >> @@ -115,6 +115,8 @@ struct drm_sched_rq { >> struct drm_sched_entity *current_entity; >> }; >> >> +struct drm_sched_job; >> + >> /** >> * struct drm_sched_fence - fences corresponding to the scheduling of a job. >> */ >> @@ -160,6 +162,9 @@ struct drm_sched_fence { >> * @owner: job owner for debugging >> */ >> void *owner; >> + >> + /* Back pointer to owning job */ >> + struct drm_sched_job *s_job; >> }; >> >> struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); @@ >> -330,8 +335,9 @@ void drm_sched_entity_set_priority(struct >> drm_sched_entity *entity, >> enum drm_sched_priority priority); bool >> drm_sched_entity_is_ready(struct drm_sched_entity *entity); >> >> -struct drm_sched_fence *drm_sched_fence_create( >> - struct drm_sched_entity *s_entity, void *owner); >> +struct drm_sched_fence *drm_sched_fence_create(struct >> drm_sched_entity *s_entity, >> + void *owner, >> + struct drm_sched_job *s_job); >> void drm_sched_fence_scheduled(struct drm_sched_fence *fence); void >> drm_sched_fence_finished(struct drm_sched_fence *fence); >> >> -- >> 2.7.4 >> >> _______________________________________________ >> dri-devel mailing list >> dri-devel@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/dri-devel
On 12/07/2018 03:19 AM, Christian König wrote: > Am 07.12.18 um 04:18 schrieb Zhou, David(ChunMing): >> >>> -----Original Message----- >>> From: dri-devel <dri-devel-bounces@lists.freedesktop.org> On Behalf Of >>> Andrey Grodzovsky >>> Sent: Friday, December 07, 2018 1:41 AM >>> To: dri-devel@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; >>> ckoenig.leichtzumerken@gmail.com; eric@anholt.net; >>> etnaviv@lists.freedesktop.org >>> Cc: Liu, Monk <Monk.Liu@amd.com> >>> Subject: [PATCH 2/2] drm/sched: Rework HW fence processing. >>> >>> Expedite job deletion from ring mirror list to the HW fence signal >>> callback >>> instead from finish_work, together with waiting for all such fences >>> to signal in >>> drm_sched_stop we garantee that already signaled job will not be >>> processed >>> twice. >>> Remove the sched finish fence callback and just submit finish_work >>> directly >>> from the HW fence callback. >>> >>> Suggested-by: Christian Koenig <Christian.Koenig@amd.com> >>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>> --- >>> drivers/gpu/drm/scheduler/sched_fence.c | 4 +++- >>> drivers/gpu/drm/scheduler/sched_main.c | 39 ++++++++++++++++---------- >>> ------- >>> include/drm/gpu_scheduler.h | 10 +++++++-- >>> 3 files changed, 30 insertions(+), 23 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c >>> b/drivers/gpu/drm/scheduler/sched_fence.c >>> index d8d2dff..e62c239 100644 >>> --- a/drivers/gpu/drm/scheduler/sched_fence.c >>> +++ b/drivers/gpu/drm/scheduler/sched_fence.c >>> @@ -151,7 +151,8 @@ struct drm_sched_fence >>> *to_drm_sched_fence(struct dma_fence *f) >>> EXPORT_SYMBOL(to_drm_sched_fence); >>> >>> struct drm_sched_fence *drm_sched_fence_create(struct >>> drm_sched_entity *entity, >>> - void *owner) >>> + void *owner, >>> + struct drm_sched_job *s_job) >>> { >>> struct drm_sched_fence *fence = NULL; >>> unsigned seq; >>> @@ -163,6 +164,7 @@ struct drm_sched_fence >>> *drm_sched_fence_create(struct drm_sched_entity *entity, >>> fence->owner = owner; >>> fence->sched = entity->rq->sched; >>> spin_lock_init(&fence->lock); >>> + fence->s_job = s_job; >>> >>> seq = atomic_inc_return(&entity->fence_seq); >>> dma_fence_init(&fence->scheduled, >>> &drm_sched_fence_ops_scheduled, diff --git >>> a/drivers/gpu/drm/scheduler/sched_main.c >>> b/drivers/gpu/drm/scheduler/sched_main.c >>> index 8fb7f86..2860037 100644 >>> --- a/drivers/gpu/drm/scheduler/sched_main.c >>> +++ b/drivers/gpu/drm/scheduler/sched_main.c >>> @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct >>> work_struct *work) >>> cancel_delayed_work_sync(&sched->work_tdr); >>> >>> spin_lock_irqsave(&sched->job_list_lock, flags); >>> - /* remove job from ring_mirror_list */ >>> - list_del_init(&s_job->node); >>> - /* queue TDR for next job */ >>> drm_sched_start_timeout(sched); >>> spin_unlock_irqrestore(&sched->job_list_lock, flags); >>> >>> sched->ops->free_job(s_job); >>> } >>> >>> -static void drm_sched_job_finish_cb(struct dma_fence *f, >>> - struct dma_fence_cb *cb) >>> -{ >>> - struct drm_sched_job *job = container_of(cb, struct drm_sched_job, >>> - finish_cb); >>> - schedule_work(&job->finish_work); >>> -} >>> - >>> static void drm_sched_job_begin(struct drm_sched_job *s_job) { >>> struct drm_gpu_scheduler *sched = s_job->sched; >>> unsigned long flags; >>> >>> - dma_fence_add_callback(&s_job->s_fence->finished, &s_job- >>>> finish_cb, >>> - drm_sched_job_finish_cb); >>> - >>> spin_lock_irqsave(&sched->job_list_lock, flags); >>> list_add_tail(&s_job->node, &sched->ring_mirror_list); >>> drm_sched_start_timeout(sched); >>> @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler >>> *sched, bool unpark_only) { >>> struct drm_sched_job *s_job, *tmp; >>> bool found_guilty = false; >>> - unsigned long flags; >>> int r; >>> >>> if (unpark_only) >>> goto unpark; >>> >>> - spin_lock_irqsave(&sched->job_list_lock, flags); >>> + /* >>> + * Locking the list is not required here as the sched thread is >>> parked >>> + * so no new jobs are being pushed in to HW and in drm_sched_stop >>> we >>> + * flushed any in flight jobs who didn't signal yet. Also >>> concurrent >>> + * GPU recovers can't run in parallel. >>> + */ >>> list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, >>> node) { >>> struct drm_sched_fence *s_fence = s_job->s_fence; >>> struct dma_fence *fence; >>> @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler >>> *sched, bool unpark_only) >>> } >>> >>> drm_sched_start_timeout(sched); >>> - spin_unlock_irqrestore(&sched->job_list_lock, flags); >>> >>> unpark: >>> kthread_unpark(sched->thread); >>> @@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sched_job *job, >>> job->sched = sched; >>> job->entity = entity; >>> job->s_priority = entity->rq - sched->sched_rq; >>> - job->s_fence = drm_sched_fence_create(entity, owner); >>> + job->s_fence = drm_sched_fence_create(entity, owner, job); >>> if (!job->s_fence) >>> return -ENOMEM; >>> job->id = atomic64_inc_return(&sched->job_id_count); >>> @@ -593,15 +582,25 @@ static void drm_sched_process_job(struct >>> dma_fence *f, struct dma_fence_cb *cb) >>> struct drm_sched_fence *s_fence = >>> container_of(cb, struct drm_sched_fence, cb); >>> struct drm_gpu_scheduler *sched = s_fence->sched; >>> + struct drm_sched_job *s_job = s_fence->s_job; >> >> Seems we are back to original, Christian argued s_fence and s_job are >> with different lifetime , Do their lifetime become same now? > > No, that still doesn't work. The scheduler fence lives much longer > than the job, so we would have a dangling pointer here. > > The real question is why we actually need that? I mean we just need to > move the callback structure into the job, don't we? > > Christian. So let's say I move the .cb from drm_sched_fence to drm_sched_job, and there it's ok to reference job->s_fence because s_fence life as at least as the job, right ? Andrey > >> >> -David >> >>> + unsigned long flags; >>> + >>> + cancel_delayed_work(&sched->work_tdr); >>> >>> - dma_fence_get(&s_fence->finished); >>> atomic_dec(&sched->hw_rq_count); >>> atomic_dec(&sched->num_jobs); >>> + >>> + spin_lock_irqsave(&sched->job_list_lock, flags); >>> + /* remove job from ring_mirror_list */ >>> + list_del_init(&s_job->node); >>> + spin_unlock_irqrestore(&sched->job_list_lock, flags); >>> + >>> drm_sched_fence_finished(s_fence); >>> >>> trace_drm_sched_process_job(s_fence); >>> - dma_fence_put(&s_fence->finished); >>> wake_up_interruptible(&sched->wake_up_worker); >>> + >>> + schedule_work(&s_job->finish_work); >>> } >>> >>> /** >>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h >>> index c94b592..23855c6 100644 >>> --- a/include/drm/gpu_scheduler.h >>> +++ b/include/drm/gpu_scheduler.h >>> @@ -115,6 +115,8 @@ struct drm_sched_rq { >>> struct drm_sched_entity *current_entity; >>> }; >>> >>> +struct drm_sched_job; >>> + >>> /** >>> * struct drm_sched_fence - fences corresponding to the scheduling >>> of a job. >>> */ >>> @@ -160,6 +162,9 @@ struct drm_sched_fence { >>> * @owner: job owner for debugging >>> */ >>> void *owner; >>> + >>> + /* Back pointer to owning job */ >>> + struct drm_sched_job *s_job; >>> }; >>> >>> struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); @@ >>> -330,8 +335,9 @@ void drm_sched_entity_set_priority(struct >>> drm_sched_entity *entity, >>> enum drm_sched_priority priority); bool >>> drm_sched_entity_is_ready(struct drm_sched_entity *entity); >>> >>> -struct drm_sched_fence *drm_sched_fence_create( >>> - struct drm_sched_entity *s_entity, void *owner); >>> +struct drm_sched_fence *drm_sched_fence_create(struct >>> drm_sched_entity *s_entity, >>> + void *owner, >>> + struct drm_sched_job *s_job); >>> void drm_sched_fence_scheduled(struct drm_sched_fence *fence); void >>> drm_sched_fence_finished(struct drm_sched_fence *fence); >>> >>> -- >>> 2.7.4 >>> >>> _______________________________________________ >>> dri-devel mailing list >>> dri-devel@lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/dri-devel >
Am 07.12.18 um 16:22 schrieb Grodzovsky, Andrey: > > On 12/07/2018 03:19 AM, Christian König wrote: >> Am 07.12.18 um 04:18 schrieb Zhou, David(ChunMing): >>>> -----Original Message----- >>>> From: dri-devel <dri-devel-bounces@lists.freedesktop.org> On Behalf Of >>>> Andrey Grodzovsky >>>> Sent: Friday, December 07, 2018 1:41 AM >>>> To: dri-devel@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; >>>> ckoenig.leichtzumerken@gmail.com; eric@anholt.net; >>>> etnaviv@lists.freedesktop.org >>>> Cc: Liu, Monk <Monk.Liu@amd.com> >>>> Subject: [PATCH 2/2] drm/sched: Rework HW fence processing. >>>> >>>> Expedite job deletion from ring mirror list to the HW fence signal >>>> callback >>>> instead from finish_work, together with waiting for all such fences >>>> to signal in >>>> drm_sched_stop we garantee that already signaled job will not be >>>> processed >>>> twice. >>>> Remove the sched finish fence callback and just submit finish_work >>>> directly >>>> from the HW fence callback. >>>> >>>> Suggested-by: Christian Koenig <Christian.Koenig@amd.com> >>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>>> --- >>>> drivers/gpu/drm/scheduler/sched_fence.c | 4 +++- >>>> drivers/gpu/drm/scheduler/sched_main.c | 39 ++++++++++++++++---------- >>>> ------- >>>> include/drm/gpu_scheduler.h | 10 +++++++-- >>>> 3 files changed, 30 insertions(+), 23 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c >>>> b/drivers/gpu/drm/scheduler/sched_fence.c >>>> index d8d2dff..e62c239 100644 >>>> --- a/drivers/gpu/drm/scheduler/sched_fence.c >>>> +++ b/drivers/gpu/drm/scheduler/sched_fence.c >>>> @@ -151,7 +151,8 @@ struct drm_sched_fence >>>> *to_drm_sched_fence(struct dma_fence *f) >>>> EXPORT_SYMBOL(to_drm_sched_fence); >>>> >>>> struct drm_sched_fence *drm_sched_fence_create(struct >>>> drm_sched_entity *entity, >>>> - void *owner) >>>> + void *owner, >>>> + struct drm_sched_job *s_job) >>>> { >>>> struct drm_sched_fence *fence = NULL; >>>> unsigned seq; >>>> @@ -163,6 +164,7 @@ struct drm_sched_fence >>>> *drm_sched_fence_create(struct drm_sched_entity *entity, >>>> fence->owner = owner; >>>> fence->sched = entity->rq->sched; >>>> spin_lock_init(&fence->lock); >>>> + fence->s_job = s_job; >>>> >>>> seq = atomic_inc_return(&entity->fence_seq); >>>> dma_fence_init(&fence->scheduled, >>>> &drm_sched_fence_ops_scheduled, diff --git >>>> a/drivers/gpu/drm/scheduler/sched_main.c >>>> b/drivers/gpu/drm/scheduler/sched_main.c >>>> index 8fb7f86..2860037 100644 >>>> --- a/drivers/gpu/drm/scheduler/sched_main.c >>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c >>>> @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct >>>> work_struct *work) >>>> cancel_delayed_work_sync(&sched->work_tdr); >>>> >>>> spin_lock_irqsave(&sched->job_list_lock, flags); >>>> - /* remove job from ring_mirror_list */ >>>> - list_del_init(&s_job->node); >>>> - /* queue TDR for next job */ >>>> drm_sched_start_timeout(sched); >>>> spin_unlock_irqrestore(&sched->job_list_lock, flags); >>>> >>>> sched->ops->free_job(s_job); >>>> } >>>> >>>> -static void drm_sched_job_finish_cb(struct dma_fence *f, >>>> - struct dma_fence_cb *cb) >>>> -{ >>>> - struct drm_sched_job *job = container_of(cb, struct drm_sched_job, >>>> - finish_cb); >>>> - schedule_work(&job->finish_work); >>>> -} >>>> - >>>> static void drm_sched_job_begin(struct drm_sched_job *s_job) { >>>> struct drm_gpu_scheduler *sched = s_job->sched; >>>> unsigned long flags; >>>> >>>> - dma_fence_add_callback(&s_job->s_fence->finished, &s_job- >>>>> finish_cb, >>>> - drm_sched_job_finish_cb); >>>> - >>>> spin_lock_irqsave(&sched->job_list_lock, flags); >>>> list_add_tail(&s_job->node, &sched->ring_mirror_list); >>>> drm_sched_start_timeout(sched); >>>> @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler >>>> *sched, bool unpark_only) { >>>> struct drm_sched_job *s_job, *tmp; >>>> bool found_guilty = false; >>>> - unsigned long flags; >>>> int r; >>>> >>>> if (unpark_only) >>>> goto unpark; >>>> >>>> - spin_lock_irqsave(&sched->job_list_lock, flags); >>>> + /* >>>> + * Locking the list is not required here as the sched thread is >>>> parked >>>> + * so no new jobs are being pushed in to HW and in drm_sched_stop >>>> we >>>> + * flushed any in flight jobs who didn't signal yet. Also >>>> concurrent >>>> + * GPU recovers can't run in parallel. >>>> + */ >>>> list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, >>>> node) { >>>> struct drm_sched_fence *s_fence = s_job->s_fence; >>>> struct dma_fence *fence; >>>> @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler >>>> *sched, bool unpark_only) >>>> } >>>> >>>> drm_sched_start_timeout(sched); >>>> - spin_unlock_irqrestore(&sched->job_list_lock, flags); >>>> >>>> unpark: >>>> kthread_unpark(sched->thread); >>>> @@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sched_job *job, >>>> job->sched = sched; >>>> job->entity = entity; >>>> job->s_priority = entity->rq - sched->sched_rq; >>>> - job->s_fence = drm_sched_fence_create(entity, owner); >>>> + job->s_fence = drm_sched_fence_create(entity, owner, job); >>>> if (!job->s_fence) >>>> return -ENOMEM; >>>> job->id = atomic64_inc_return(&sched->job_id_count); >>>> @@ -593,15 +582,25 @@ static void drm_sched_process_job(struct >>>> dma_fence *f, struct dma_fence_cb *cb) >>>> struct drm_sched_fence *s_fence = >>>> container_of(cb, struct drm_sched_fence, cb); >>>> struct drm_gpu_scheduler *sched = s_fence->sched; >>>> + struct drm_sched_job *s_job = s_fence->s_job; >>> Seems we are back to original, Christian argued s_fence and s_job are >>> with different lifetime , Do their lifetime become same now? >> No, that still doesn't work. The scheduler fence lives much longer >> than the job, so we would have a dangling pointer here. >> >> The real question is why we actually need that? I mean we just need to >> move the callback structure into the job, don't we? >> >> Christian. > So let's say I move the .cb from drm_sched_fence to drm_sched_job, and > there it's ok to reference > job->s_fence because s_fence life as at least as the job, right ? Exactly. Well, we could actually make the live of s_fence a bit shorter and drop it as soon as it is signaled. Christian. > > Andrey > >>> -David >>> >>>> + unsigned long flags; >>>> + >>>> + cancel_delayed_work(&sched->work_tdr); >>>> >>>> - dma_fence_get(&s_fence->finished); >>>> atomic_dec(&sched->hw_rq_count); >>>> atomic_dec(&sched->num_jobs); >>>> + >>>> + spin_lock_irqsave(&sched->job_list_lock, flags); >>>> + /* remove job from ring_mirror_list */ >>>> + list_del_init(&s_job->node); >>>> + spin_unlock_irqrestore(&sched->job_list_lock, flags); >>>> + >>>> drm_sched_fence_finished(s_fence); >>>> >>>> trace_drm_sched_process_job(s_fence); >>>> - dma_fence_put(&s_fence->finished); >>>> wake_up_interruptible(&sched->wake_up_worker); >>>> + >>>> + schedule_work(&s_job->finish_work); >>>> } >>>> >>>> /** >>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h >>>> index c94b592..23855c6 100644 >>>> --- a/include/drm/gpu_scheduler.h >>>> +++ b/include/drm/gpu_scheduler.h >>>> @@ -115,6 +115,8 @@ struct drm_sched_rq { >>>> struct drm_sched_entity *current_entity; >>>> }; >>>> >>>> +struct drm_sched_job; >>>> + >>>> /** >>>> * struct drm_sched_fence - fences corresponding to the scheduling >>>> of a job. >>>> */ >>>> @@ -160,6 +162,9 @@ struct drm_sched_fence { >>>> * @owner: job owner for debugging >>>> */ >>>> void *owner; >>>> + >>>> + /* Back pointer to owning job */ >>>> + struct drm_sched_job *s_job; >>>> }; >>>> >>>> struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); @@ >>>> -330,8 +335,9 @@ void drm_sched_entity_set_priority(struct >>>> drm_sched_entity *entity, >>>> enum drm_sched_priority priority); bool >>>> drm_sched_entity_is_ready(struct drm_sched_entity *entity); >>>> >>>> -struct drm_sched_fence *drm_sched_fence_create( >>>> - struct drm_sched_entity *s_entity, void *owner); >>>> +struct drm_sched_fence *drm_sched_fence_create(struct >>>> drm_sched_entity *s_entity, >>>> + void *owner, >>>> + struct drm_sched_job *s_job); >>>> void drm_sched_fence_scheduled(struct drm_sched_fence *fence); void >>>> drm_sched_fence_finished(struct drm_sched_fence *fence); >>>> >>>> -- >>>> 2.7.4 >>>> >>>> _______________________________________________ >>>> dri-devel mailing list >>>> dri-devel@lists.freedesktop.org >>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index d8d2dff..e62c239 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++ b/drivers/gpu/drm/scheduler/sched_fence.c @@ -151,7 +151,8 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f) EXPORT_SYMBOL(to_drm_sched_fence); struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity, - void *owner) + void *owner, + struct drm_sched_job *s_job) { struct drm_sched_fence *fence = NULL; unsigned seq; @@ -163,6 +164,7 @@ struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity, fence->owner = owner; fence->sched = entity->rq->sched; spin_lock_init(&fence->lock); + fence->s_job = s_job; seq = atomic_inc_return(&entity->fence_seq); dma_fence_init(&fence->scheduled, &drm_sched_fence_ops_scheduled, diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 8fb7f86..2860037 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct work_struct *work) cancel_delayed_work_sync(&sched->work_tdr); spin_lock_irqsave(&sched->job_list_lock, flags); - /* remove job from ring_mirror_list */ - list_del_init(&s_job->node); - /* queue TDR for next job */ drm_sched_start_timeout(sched); spin_unlock_irqrestore(&sched->job_list_lock, flags); sched->ops->free_job(s_job); } -static void drm_sched_job_finish_cb(struct dma_fence *f, - struct dma_fence_cb *cb) -{ - struct drm_sched_job *job = container_of(cb, struct drm_sched_job, - finish_cb); - schedule_work(&job->finish_work); -} - static void drm_sched_job_begin(struct drm_sched_job *s_job) { struct drm_gpu_scheduler *sched = s_job->sched; unsigned long flags; - dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb, - drm_sched_job_finish_cb); - spin_lock_irqsave(&sched->job_list_lock, flags); list_add_tail(&s_job->node, &sched->ring_mirror_list); drm_sched_start_timeout(sched); @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only) { struct drm_sched_job *s_job, *tmp; bool found_guilty = false; - unsigned long flags; int r; if (unpark_only) goto unpark; - spin_lock_irqsave(&sched->job_list_lock, flags); + /* + * Locking the list is not required here as the sched thread is parked + * so no new jobs are being pushed in to HW and in drm_sched_stop we + * flushed any in flight jobs who didn't signal yet. Also concurrent + * GPU recovers can't run in parallel. + */ list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { struct drm_sched_fence *s_fence = s_job->s_fence; struct dma_fence *fence; @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only) } drm_sched_start_timeout(sched); - spin_unlock_irqrestore(&sched->job_list_lock, flags); unpark: kthread_unpark(sched->thread); @@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sched_job *job, job->sched = sched; job->entity = entity; job->s_priority = entity->rq - sched->sched_rq; - job->s_fence = drm_sched_fence_create(entity, owner); + job->s_fence = drm_sched_fence_create(entity, owner, job); if (!job->s_fence) return -ENOMEM; job->id = atomic64_inc_return(&sched->job_id_count); @@ -593,15 +582,25 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) struct drm_sched_fence *s_fence = container_of(cb, struct drm_sched_fence, cb); struct drm_gpu_scheduler *sched = s_fence->sched; + struct drm_sched_job *s_job = s_fence->s_job; + unsigned long flags; + + cancel_delayed_work(&sched->work_tdr); - dma_fence_get(&s_fence->finished); atomic_dec(&sched->hw_rq_count); atomic_dec(&sched->num_jobs); + + spin_lock_irqsave(&sched->job_list_lock, flags); + /* remove job from ring_mirror_list */ + list_del_init(&s_job->node); + spin_unlock_irqrestore(&sched->job_list_lock, flags); + drm_sched_fence_finished(s_fence); trace_drm_sched_process_job(s_fence); - dma_fence_put(&s_fence->finished); wake_up_interruptible(&sched->wake_up_worker); + + schedule_work(&s_job->finish_work); } /** diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index c94b592..23855c6 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -115,6 +115,8 @@ struct drm_sched_rq { struct drm_sched_entity *current_entity; }; +struct drm_sched_job; + /** * struct drm_sched_fence - fences corresponding to the scheduling of a job. */ @@ -160,6 +162,9 @@ struct drm_sched_fence { * @owner: job owner for debugging */ void *owner; + + /* Back pointer to owning job */ + struct drm_sched_job *s_job; }; struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); @@ -330,8 +335,9 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity, enum drm_sched_priority priority); bool drm_sched_entity_is_ready(struct drm_sched_entity *entity); -struct drm_sched_fence *drm_sched_fence_create( - struct drm_sched_entity *s_entity, void *owner); +struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *s_entity, + void *owner, + struct drm_sched_job *s_job); void drm_sched_fence_scheduled(struct drm_sched_fence *fence); void drm_sched_fence_finished(struct drm_sched_fence *fence);
Expedite job deletion from ring mirror list to the HW fence signal callback instead from finish_work, together with waiting for all such fences to signal in drm_sched_stop we garantee that already signaled job will not be processed twice. Remove the sched finish fence callback and just submit finish_work directly from the HW fence callback. Suggested-by: Christian Koenig <Christian.Koenig@amd.com> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> --- drivers/gpu/drm/scheduler/sched_fence.c | 4 +++- drivers/gpu/drm/scheduler/sched_main.c | 39 ++++++++++++++++----------------- include/drm/gpu_scheduler.h | 10 +++++++-- 3 files changed, 30 insertions(+), 23 deletions(-)