diff mbox series

[2/2] drm/sched: Rework HW fence processing.

Message ID 1544118074-24910-2-git-send-email-andrey.grodzovsky@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/sched: Refactor ring mirror list handling. | expand

Commit Message

Andrey Grodzovsky Dec. 6, 2018, 5:41 p.m. UTC
Expedite job deletion from ring mirror list to the HW fence signal
callback instead from finish_work, together with waiting for all
such fences to signal in drm_sched_stop we garantee that
already signaled job will not be processed twice.
Remove the sched finish fence callback and just submit finish_work
directly from the HW fence callback.

Suggested-by: Christian Koenig <Christian.Koenig@amd.com>
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/scheduler/sched_fence.c |  4 +++-
 drivers/gpu/drm/scheduler/sched_main.c  | 39 ++++++++++++++++-----------------
 include/drm/gpu_scheduler.h             | 10 +++++++--
 3 files changed, 30 insertions(+), 23 deletions(-)

Comments

Andrey Grodzovsky Dec. 6, 2018, 6:12 p.m. UTC | #1
On 12/06/2018 12:41 PM, Andrey Grodzovsky wrote:
> Expedite job deletion from ring mirror list to the HW fence signal
> callback instead from finish_work, together with waiting for all
> such fences to signal in drm_sched_stop we garantee that
> already signaled job will not be processed twice.
> Remove the sched finish fence callback and just submit finish_work
> directly from the HW fence callback.
>
> Suggested-by: Christian Koenig <Christian.Koenig@amd.com>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/scheduler/sched_fence.c |  4 +++-
>   drivers/gpu/drm/scheduler/sched_main.c  | 39 ++++++++++++++++-----------------
>   include/drm/gpu_scheduler.h             | 10 +++++++--
>   3 files changed, 30 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
> index d8d2dff..e62c239 100644
> --- a/drivers/gpu/drm/scheduler/sched_fence.c
> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> @@ -151,7 +151,8 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
>   EXPORT_SYMBOL(to_drm_sched_fence);
>   
>   struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity,
> -					       void *owner)
> +					       void *owner,
> +					       struct drm_sched_job *s_job)
>   {
>   	struct drm_sched_fence *fence = NULL;
>   	unsigned seq;
> @@ -163,6 +164,7 @@ struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity,
>   	fence->owner = owner;
>   	fence->sched = entity->rq->sched;
>   	spin_lock_init(&fence->lock);
> +	fence->s_job = s_job;
>   
>   	seq = atomic_inc_return(&entity->fence_seq);
>   	dma_fence_init(&fence->scheduled, &drm_sched_fence_ops_scheduled,
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 8fb7f86..2860037 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct work_struct *work)
>   	cancel_delayed_work_sync(&sched->work_tdr);
>   
>   	spin_lock_irqsave(&sched->job_list_lock, flags);
> -	/* remove job from ring_mirror_list */
> -	list_del_init(&s_job->node);
> -	/* queue TDR for next job */
>   	drm_sched_start_timeout(sched);
>   	spin_unlock_irqrestore(&sched->job_list_lock, flags);
>   
>   	sched->ops->free_job(s_job);
>   }
>   
> -static void drm_sched_job_finish_cb(struct dma_fence *f,
> -				    struct dma_fence_cb *cb)
> -{
> -	struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
> -						 finish_cb);
> -	schedule_work(&job->finish_work);
> -}
> -
>   static void drm_sched_job_begin(struct drm_sched_job *s_job)
>   {
>   	struct drm_gpu_scheduler *sched = s_job->sched;
>   	unsigned long flags;
>   
> -	dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb,
> -			       drm_sched_job_finish_cb);
> -
>   	spin_lock_irqsave(&sched->job_list_lock, flags);
>   	list_add_tail(&s_job->node, &sched->ring_mirror_list);
>   	drm_sched_start_timeout(sched);
> @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only)
>   {
>   	struct drm_sched_job *s_job, *tmp;
>   	bool found_guilty = false;
> -	unsigned long flags;
>   	int r;
>   
>   	if (unpark_only)
>   		goto unpark;
>   
> -	spin_lock_irqsave(&sched->job_list_lock, flags);	
> +	/*
> +	 * Locking the list is not required here as the sched thread is parked
> +	 * so no new jobs are being pushed in to HW and in drm_sched_stop we
> +	 * flushed any in flight jobs who didn't signal yet.

The comment is inaccurate here - it's supposed to be ' any in flight 
jobs who already have their
sched finished signaled and they are removed from the mirror ring list 
at that point already anyway'
I will fix this text later with other comments received on the patches.

Andrey
> Also concurrent
> +	 * GPU recovers can't run in parallel.
> +	 */
>   	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
>   		struct drm_sched_fence *s_fence = s_job->s_fence;
>   		struct dma_fence *fence;
> @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only)
>   	}
>   
>   	drm_sched_start_timeout(sched);
> -	spin_unlock_irqrestore(&sched->job_list_lock, flags);
>   
>   unpark:
>   	kthread_unpark(sched->thread);
> @@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sched_job *job,
>   	job->sched = sched;
>   	job->entity = entity;
>   	job->s_priority = entity->rq - sched->sched_rq;
> -	job->s_fence = drm_sched_fence_create(entity, owner);
> +	job->s_fence = drm_sched_fence_create(entity, owner, job);
>   	if (!job->s_fence)
>   		return -ENOMEM;
>   	job->id = atomic64_inc_return(&sched->job_id_count);
> @@ -593,15 +582,25 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
>   	struct drm_sched_fence *s_fence =
>   		container_of(cb, struct drm_sched_fence, cb);
>   	struct drm_gpu_scheduler *sched = s_fence->sched;
> +	struct drm_sched_job *s_job = s_fence->s_job;
> +	unsigned long flags;
> +
> +	cancel_delayed_work(&sched->work_tdr);
>   
> -	dma_fence_get(&s_fence->finished);
>   	atomic_dec(&sched->hw_rq_count);
>   	atomic_dec(&sched->num_jobs);
> +
> +	spin_lock_irqsave(&sched->job_list_lock, flags);
> +	/* remove job from ring_mirror_list */
> +	list_del_init(&s_job->node);
> +	spin_unlock_irqrestore(&sched->job_list_lock, flags);
> +
>   	drm_sched_fence_finished(s_fence);
>   
>   	trace_drm_sched_process_job(s_fence);
> -	dma_fence_put(&s_fence->finished);
>   	wake_up_interruptible(&sched->wake_up_worker);
> +
> +	schedule_work(&s_job->finish_work);
>   }
>   
>   /**
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index c94b592..23855c6 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -115,6 +115,8 @@ struct drm_sched_rq {
>   	struct drm_sched_entity		*current_entity;
>   };
>   
> +struct drm_sched_job;
> +
>   /**
>    * struct drm_sched_fence - fences corresponding to the scheduling of a job.
>    */
> @@ -160,6 +162,9 @@ struct drm_sched_fence {
>            * @owner: job owner for debugging
>            */
>   	void				*owner;
> +
> +	/* Back pointer to owning job */
> +	struct drm_sched_job 		*s_job;
>   };
>   
>   struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
> @@ -330,8 +335,9 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
>   				   enum drm_sched_priority priority);
>   bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
>   
> -struct drm_sched_fence *drm_sched_fence_create(
> -	struct drm_sched_entity *s_entity, void *owner);
> +struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *s_entity,
> +					       void *owner,
> +					       struct drm_sched_job *s_job);
>   void drm_sched_fence_scheduled(struct drm_sched_fence *fence);
>   void drm_sched_fence_finished(struct drm_sched_fence *fence);
>
Chunming Zhou Dec. 7, 2018, 3:18 a.m. UTC | #2
> -----Original Message-----
> From: dri-devel <dri-devel-bounces@lists.freedesktop.org> On Behalf Of
> Andrey Grodzovsky
> Sent: Friday, December 07, 2018 1:41 AM
> To: dri-devel@lists.freedesktop.org; amd-gfx@lists.freedesktop.org;
> ckoenig.leichtzumerken@gmail.com; eric@anholt.net;
> etnaviv@lists.freedesktop.org
> Cc: Liu, Monk <Monk.Liu@amd.com>
> Subject: [PATCH 2/2] drm/sched: Rework HW fence processing.
> 
> Expedite job deletion from ring mirror list to the HW fence signal callback
> instead from finish_work, together with waiting for all such fences to signal in
> drm_sched_stop we garantee that already signaled job will not be processed
> twice.
> Remove the sched finish fence callback and just submit finish_work directly
> from the HW fence callback.
> 
> Suggested-by: Christian Koenig <Christian.Koenig@amd.com>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>  drivers/gpu/drm/scheduler/sched_fence.c |  4 +++-
> drivers/gpu/drm/scheduler/sched_main.c  | 39 ++++++++++++++++----------
> -------
>  include/drm/gpu_scheduler.h             | 10 +++++++--
>  3 files changed, 30 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c
> b/drivers/gpu/drm/scheduler/sched_fence.c
> index d8d2dff..e62c239 100644
> --- a/drivers/gpu/drm/scheduler/sched_fence.c
> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> @@ -151,7 +151,8 @@ struct drm_sched_fence
> *to_drm_sched_fence(struct dma_fence *f)
> EXPORT_SYMBOL(to_drm_sched_fence);
> 
>  struct drm_sched_fence *drm_sched_fence_create(struct
> drm_sched_entity *entity,
> -					       void *owner)
> +					       void *owner,
> +					       struct drm_sched_job *s_job)
>  {
>  	struct drm_sched_fence *fence = NULL;
>  	unsigned seq;
> @@ -163,6 +164,7 @@ struct drm_sched_fence
> *drm_sched_fence_create(struct drm_sched_entity *entity,
>  	fence->owner = owner;
>  	fence->sched = entity->rq->sched;
>  	spin_lock_init(&fence->lock);
> +	fence->s_job = s_job;
> 
>  	seq = atomic_inc_return(&entity->fence_seq);
>  	dma_fence_init(&fence->scheduled,
> &drm_sched_fence_ops_scheduled, diff --git
> a/drivers/gpu/drm/scheduler/sched_main.c
> b/drivers/gpu/drm/scheduler/sched_main.c
> index 8fb7f86..2860037 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct
> work_struct *work)
>  	cancel_delayed_work_sync(&sched->work_tdr);
> 
>  	spin_lock_irqsave(&sched->job_list_lock, flags);
> -	/* remove job from ring_mirror_list */
> -	list_del_init(&s_job->node);
> -	/* queue TDR for next job */
>  	drm_sched_start_timeout(sched);
>  	spin_unlock_irqrestore(&sched->job_list_lock, flags);
> 
>  	sched->ops->free_job(s_job);
>  }
> 
> -static void drm_sched_job_finish_cb(struct dma_fence *f,
> -				    struct dma_fence_cb *cb)
> -{
> -	struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
> -						 finish_cb);
> -	schedule_work(&job->finish_work);
> -}
> -
>  static void drm_sched_job_begin(struct drm_sched_job *s_job)  {
>  	struct drm_gpu_scheduler *sched = s_job->sched;
>  	unsigned long flags;
> 
> -	dma_fence_add_callback(&s_job->s_fence->finished, &s_job-
> >finish_cb,
> -			       drm_sched_job_finish_cb);
> -
>  	spin_lock_irqsave(&sched->job_list_lock, flags);
>  	list_add_tail(&s_job->node, &sched->ring_mirror_list);
>  	drm_sched_start_timeout(sched);
> @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler
> *sched, bool unpark_only)  {
>  	struct drm_sched_job *s_job, *tmp;
>  	bool found_guilty = false;
> -	unsigned long flags;
>  	int r;
> 
>  	if (unpark_only)
>  		goto unpark;
> 
> -	spin_lock_irqsave(&sched->job_list_lock, flags);
> +	/*
> +	 * Locking the list is not required here as the sched thread is parked
> +	 * so no new jobs are being pushed in to HW and in drm_sched_stop
> we
> +	 * flushed any in flight jobs who didn't signal yet. Also concurrent
> +	 * GPU recovers can't run in parallel.
> +	 */
>  	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list,
> node) {
>  		struct drm_sched_fence *s_fence = s_job->s_fence;
>  		struct dma_fence *fence;
> @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler
> *sched, bool unpark_only)
>  	}
> 
>  	drm_sched_start_timeout(sched);
> -	spin_unlock_irqrestore(&sched->job_list_lock, flags);
> 
>  unpark:
>  	kthread_unpark(sched->thread);
> @@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sched_job *job,
>  	job->sched = sched;
>  	job->entity = entity;
>  	job->s_priority = entity->rq - sched->sched_rq;
> -	job->s_fence = drm_sched_fence_create(entity, owner);
> +	job->s_fence = drm_sched_fence_create(entity, owner, job);
>  	if (!job->s_fence)
>  		return -ENOMEM;
>  	job->id = atomic64_inc_return(&sched->job_id_count);
> @@ -593,15 +582,25 @@ static void drm_sched_process_job(struct
> dma_fence *f, struct dma_fence_cb *cb)
>  	struct drm_sched_fence *s_fence =
>  		container_of(cb, struct drm_sched_fence, cb);
>  	struct drm_gpu_scheduler *sched = s_fence->sched;
> +	struct drm_sched_job *s_job = s_fence->s_job;


Seems we are back to original, Christian argued s_fence and s_job are with different lifetime , Do their lifetime become same now?

-David

> +	unsigned long flags;
> +
> +	cancel_delayed_work(&sched->work_tdr);
> 
> -	dma_fence_get(&s_fence->finished);
>  	atomic_dec(&sched->hw_rq_count);
>  	atomic_dec(&sched->num_jobs);
> +
> +	spin_lock_irqsave(&sched->job_list_lock, flags);
> +	/* remove job from ring_mirror_list */
> +	list_del_init(&s_job->node);
> +	spin_unlock_irqrestore(&sched->job_list_lock, flags);
> +
>  	drm_sched_fence_finished(s_fence);
> 
>  	trace_drm_sched_process_job(s_fence);
> -	dma_fence_put(&s_fence->finished);
>  	wake_up_interruptible(&sched->wake_up_worker);
> +
> +	schedule_work(&s_job->finish_work);
>  }
> 
>  /**
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index c94b592..23855c6 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -115,6 +115,8 @@ struct drm_sched_rq {
>  	struct drm_sched_entity		*current_entity;
>  };
> 
> +struct drm_sched_job;
> +
>  /**
>   * struct drm_sched_fence - fences corresponding to the scheduling of a job.
>   */
> @@ -160,6 +162,9 @@ struct drm_sched_fence {
>           * @owner: job owner for debugging
>           */
>  	void				*owner;
> +
> +	/* Back pointer to owning job */
> +	struct drm_sched_job 		*s_job;
>  };
> 
>  struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); @@
> -330,8 +335,9 @@ void drm_sched_entity_set_priority(struct
> drm_sched_entity *entity,
>  				   enum drm_sched_priority priority);  bool
> drm_sched_entity_is_ready(struct drm_sched_entity *entity);
> 
> -struct drm_sched_fence *drm_sched_fence_create(
> -	struct drm_sched_entity *s_entity, void *owner);
> +struct drm_sched_fence *drm_sched_fence_create(struct
> drm_sched_entity *s_entity,
> +					       void *owner,
> +					       struct drm_sched_job *s_job);
>  void drm_sched_fence_scheduled(struct drm_sched_fence *fence);  void
> drm_sched_fence_finished(struct drm_sched_fence *fence);
> 
> --
> 2.7.4
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Christian König Dec. 7, 2018, 8:19 a.m. UTC | #3
Am 07.12.18 um 04:18 schrieb Zhou, David(ChunMing):
>
>> -----Original Message-----
>> From: dri-devel <dri-devel-bounces@lists.freedesktop.org> On Behalf Of
>> Andrey Grodzovsky
>> Sent: Friday, December 07, 2018 1:41 AM
>> To: dri-devel@lists.freedesktop.org; amd-gfx@lists.freedesktop.org;
>> ckoenig.leichtzumerken@gmail.com; eric@anholt.net;
>> etnaviv@lists.freedesktop.org
>> Cc: Liu, Monk <Monk.Liu@amd.com>
>> Subject: [PATCH 2/2] drm/sched: Rework HW fence processing.
>>
>> Expedite job deletion from ring mirror list to the HW fence signal callback
>> instead from finish_work, together with waiting for all such fences to signal in
>> drm_sched_stop we garantee that already signaled job will not be processed
>> twice.
>> Remove the sched finish fence callback and just submit finish_work directly
>> from the HW fence callback.
>>
>> Suggested-by: Christian Koenig <Christian.Koenig@amd.com>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>> ---
>>   drivers/gpu/drm/scheduler/sched_fence.c |  4 +++-
>> drivers/gpu/drm/scheduler/sched_main.c  | 39 ++++++++++++++++----------
>> -------
>>   include/drm/gpu_scheduler.h             | 10 +++++++--
>>   3 files changed, 30 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c
>> b/drivers/gpu/drm/scheduler/sched_fence.c
>> index d8d2dff..e62c239 100644
>> --- a/drivers/gpu/drm/scheduler/sched_fence.c
>> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
>> @@ -151,7 +151,8 @@ struct drm_sched_fence
>> *to_drm_sched_fence(struct dma_fence *f)
>> EXPORT_SYMBOL(to_drm_sched_fence);
>>
>>   struct drm_sched_fence *drm_sched_fence_create(struct
>> drm_sched_entity *entity,
>> -					       void *owner)
>> +					       void *owner,
>> +					       struct drm_sched_job *s_job)
>>   {
>>   	struct drm_sched_fence *fence = NULL;
>>   	unsigned seq;
>> @@ -163,6 +164,7 @@ struct drm_sched_fence
>> *drm_sched_fence_create(struct drm_sched_entity *entity,
>>   	fence->owner = owner;
>>   	fence->sched = entity->rq->sched;
>>   	spin_lock_init(&fence->lock);
>> +	fence->s_job = s_job;
>>
>>   	seq = atomic_inc_return(&entity->fence_seq);
>>   	dma_fence_init(&fence->scheduled,
>> &drm_sched_fence_ops_scheduled, diff --git
>> a/drivers/gpu/drm/scheduler/sched_main.c
>> b/drivers/gpu/drm/scheduler/sched_main.c
>> index 8fb7f86..2860037 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct
>> work_struct *work)
>>   	cancel_delayed_work_sync(&sched->work_tdr);
>>
>>   	spin_lock_irqsave(&sched->job_list_lock, flags);
>> -	/* remove job from ring_mirror_list */
>> -	list_del_init(&s_job->node);
>> -	/* queue TDR for next job */
>>   	drm_sched_start_timeout(sched);
>>   	spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>
>>   	sched->ops->free_job(s_job);
>>   }
>>
>> -static void drm_sched_job_finish_cb(struct dma_fence *f,
>> -				    struct dma_fence_cb *cb)
>> -{
>> -	struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
>> -						 finish_cb);
>> -	schedule_work(&job->finish_work);
>> -}
>> -
>>   static void drm_sched_job_begin(struct drm_sched_job *s_job)  {
>>   	struct drm_gpu_scheduler *sched = s_job->sched;
>>   	unsigned long flags;
>>
>> -	dma_fence_add_callback(&s_job->s_fence->finished, &s_job-
>>> finish_cb,
>> -			       drm_sched_job_finish_cb);
>> -
>>   	spin_lock_irqsave(&sched->job_list_lock, flags);
>>   	list_add_tail(&s_job->node, &sched->ring_mirror_list);
>>   	drm_sched_start_timeout(sched);
>> @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler
>> *sched, bool unpark_only)  {
>>   	struct drm_sched_job *s_job, *tmp;
>>   	bool found_guilty = false;
>> -	unsigned long flags;
>>   	int r;
>>
>>   	if (unpark_only)
>>   		goto unpark;
>>
>> -	spin_lock_irqsave(&sched->job_list_lock, flags);
>> +	/*
>> +	 * Locking the list is not required here as the sched thread is parked
>> +	 * so no new jobs are being pushed in to HW and in drm_sched_stop
>> we
>> +	 * flushed any in flight jobs who didn't signal yet. Also concurrent
>> +	 * GPU recovers can't run in parallel.
>> +	 */
>>   	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list,
>> node) {
>>   		struct drm_sched_fence *s_fence = s_job->s_fence;
>>   		struct dma_fence *fence;
>> @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler
>> *sched, bool unpark_only)
>>   	}
>>
>>   	drm_sched_start_timeout(sched);
>> -	spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>
>>   unpark:
>>   	kthread_unpark(sched->thread);
>> @@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sched_job *job,
>>   	job->sched = sched;
>>   	job->entity = entity;
>>   	job->s_priority = entity->rq - sched->sched_rq;
>> -	job->s_fence = drm_sched_fence_create(entity, owner);
>> +	job->s_fence = drm_sched_fence_create(entity, owner, job);
>>   	if (!job->s_fence)
>>   		return -ENOMEM;
>>   	job->id = atomic64_inc_return(&sched->job_id_count);
>> @@ -593,15 +582,25 @@ static void drm_sched_process_job(struct
>> dma_fence *f, struct dma_fence_cb *cb)
>>   	struct drm_sched_fence *s_fence =
>>   		container_of(cb, struct drm_sched_fence, cb);
>>   	struct drm_gpu_scheduler *sched = s_fence->sched;
>> +	struct drm_sched_job *s_job = s_fence->s_job;
>
> Seems we are back to original, Christian argued s_fence and s_job are with different lifetime , Do their lifetime become same now?

No, that still doesn't work. The scheduler fence lives much longer than 
the job, so we would have a dangling pointer here.

The real question is why we actually need that? I mean we just need to 
move the callback structure into the job, don't we?

Christian.

>
> -David
>
>> +	unsigned long flags;
>> +
>> +	cancel_delayed_work(&sched->work_tdr);
>>
>> -	dma_fence_get(&s_fence->finished);
>>   	atomic_dec(&sched->hw_rq_count);
>>   	atomic_dec(&sched->num_jobs);
>> +
>> +	spin_lock_irqsave(&sched->job_list_lock, flags);
>> +	/* remove job from ring_mirror_list */
>> +	list_del_init(&s_job->node);
>> +	spin_unlock_irqrestore(&sched->job_list_lock, flags);
>> +
>>   	drm_sched_fence_finished(s_fence);
>>
>>   	trace_drm_sched_process_job(s_fence);
>> -	dma_fence_put(&s_fence->finished);
>>   	wake_up_interruptible(&sched->wake_up_worker);
>> +
>> +	schedule_work(&s_job->finish_work);
>>   }
>>
>>   /**
>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>> index c94b592..23855c6 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -115,6 +115,8 @@ struct drm_sched_rq {
>>   	struct drm_sched_entity		*current_entity;
>>   };
>>
>> +struct drm_sched_job;
>> +
>>   /**
>>    * struct drm_sched_fence - fences corresponding to the scheduling of a job.
>>    */
>> @@ -160,6 +162,9 @@ struct drm_sched_fence {
>>            * @owner: job owner for debugging
>>            */
>>   	void				*owner;
>> +
>> +	/* Back pointer to owning job */
>> +	struct drm_sched_job 		*s_job;
>>   };
>>
>>   struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); @@
>> -330,8 +335,9 @@ void drm_sched_entity_set_priority(struct
>> drm_sched_entity *entity,
>>   				   enum drm_sched_priority priority);  bool
>> drm_sched_entity_is_ready(struct drm_sched_entity *entity);
>>
>> -struct drm_sched_fence *drm_sched_fence_create(
>> -	struct drm_sched_entity *s_entity, void *owner);
>> +struct drm_sched_fence *drm_sched_fence_create(struct
>> drm_sched_entity *s_entity,
>> +					       void *owner,
>> +					       struct drm_sched_job *s_job);
>>   void drm_sched_fence_scheduled(struct drm_sched_fence *fence);  void
>> drm_sched_fence_finished(struct drm_sched_fence *fence);
>>
>> --
>> 2.7.4
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Andrey Grodzovsky Dec. 7, 2018, 3:22 p.m. UTC | #4
On 12/07/2018 03:19 AM, Christian König wrote:
> Am 07.12.18 um 04:18 schrieb Zhou, David(ChunMing):
>>
>>> -----Original Message-----
>>> From: dri-devel <dri-devel-bounces@lists.freedesktop.org> On Behalf Of
>>> Andrey Grodzovsky
>>> Sent: Friday, December 07, 2018 1:41 AM
>>> To: dri-devel@lists.freedesktop.org; amd-gfx@lists.freedesktop.org;
>>> ckoenig.leichtzumerken@gmail.com; eric@anholt.net;
>>> etnaviv@lists.freedesktop.org
>>> Cc: Liu, Monk <Monk.Liu@amd.com>
>>> Subject: [PATCH 2/2] drm/sched: Rework HW fence processing.
>>>
>>> Expedite job deletion from ring mirror list to the HW fence signal 
>>> callback
>>> instead from finish_work, together with waiting for all such fences 
>>> to signal in
>>> drm_sched_stop we garantee that already signaled job will not be 
>>> processed
>>> twice.
>>> Remove the sched finish fence callback and just submit finish_work 
>>> directly
>>> from the HW fence callback.
>>>
>>> Suggested-by: Christian Koenig <Christian.Koenig@amd.com>
>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>> ---
>>>   drivers/gpu/drm/scheduler/sched_fence.c |  4 +++-
>>> drivers/gpu/drm/scheduler/sched_main.c  | 39 ++++++++++++++++----------
>>> -------
>>>   include/drm/gpu_scheduler.h             | 10 +++++++--
>>>   3 files changed, 30 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c
>>> b/drivers/gpu/drm/scheduler/sched_fence.c
>>> index d8d2dff..e62c239 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_fence.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
>>> @@ -151,7 +151,8 @@ struct drm_sched_fence
>>> *to_drm_sched_fence(struct dma_fence *f)
>>> EXPORT_SYMBOL(to_drm_sched_fence);
>>>
>>>   struct drm_sched_fence *drm_sched_fence_create(struct
>>> drm_sched_entity *entity,
>>> -                           void *owner)
>>> +                           void *owner,
>>> +                           struct drm_sched_job *s_job)
>>>   {
>>>       struct drm_sched_fence *fence = NULL;
>>>       unsigned seq;
>>> @@ -163,6 +164,7 @@ struct drm_sched_fence
>>> *drm_sched_fence_create(struct drm_sched_entity *entity,
>>>       fence->owner = owner;
>>>       fence->sched = entity->rq->sched;
>>>       spin_lock_init(&fence->lock);
>>> +    fence->s_job = s_job;
>>>
>>>       seq = atomic_inc_return(&entity->fence_seq);
>>>       dma_fence_init(&fence->scheduled,
>>> &drm_sched_fence_ops_scheduled, diff --git
>>> a/drivers/gpu/drm/scheduler/sched_main.c
>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>> index 8fb7f86..2860037 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>> @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct
>>> work_struct *work)
>>>       cancel_delayed_work_sync(&sched->work_tdr);
>>>
>>>       spin_lock_irqsave(&sched->job_list_lock, flags);
>>> -    /* remove job from ring_mirror_list */
>>> -    list_del_init(&s_job->node);
>>> -    /* queue TDR for next job */
>>>       drm_sched_start_timeout(sched);
>>>       spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>>
>>>       sched->ops->free_job(s_job);
>>>   }
>>>
>>> -static void drm_sched_job_finish_cb(struct dma_fence *f,
>>> -                    struct dma_fence_cb *cb)
>>> -{
>>> -    struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
>>> -                         finish_cb);
>>> -    schedule_work(&job->finish_work);
>>> -}
>>> -
>>>   static void drm_sched_job_begin(struct drm_sched_job *s_job)  {
>>>       struct drm_gpu_scheduler *sched = s_job->sched;
>>>       unsigned long flags;
>>>
>>> - dma_fence_add_callback(&s_job->s_fence->finished, &s_job-
>>>> finish_cb,
>>> -                   drm_sched_job_finish_cb);
>>> -
>>>       spin_lock_irqsave(&sched->job_list_lock, flags);
>>>       list_add_tail(&s_job->node, &sched->ring_mirror_list);
>>>       drm_sched_start_timeout(sched);
>>> @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler
>>> *sched, bool unpark_only)  {
>>>       struct drm_sched_job *s_job, *tmp;
>>>       bool found_guilty = false;
>>> -    unsigned long flags;
>>>       int r;
>>>
>>>       if (unpark_only)
>>>           goto unpark;
>>>
>>> -    spin_lock_irqsave(&sched->job_list_lock, flags);
>>> +    /*
>>> +     * Locking the list is not required here as the sched thread is 
>>> parked
>>> +     * so no new jobs are being pushed in to HW and in drm_sched_stop
>>> we
>>> +     * flushed any in flight jobs who didn't signal yet. Also 
>>> concurrent
>>> +     * GPU recovers can't run in parallel.
>>> +     */
>>>       list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list,
>>> node) {
>>>           struct drm_sched_fence *s_fence = s_job->s_fence;
>>>           struct dma_fence *fence;
>>> @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler
>>> *sched, bool unpark_only)
>>>       }
>>>
>>>       drm_sched_start_timeout(sched);
>>> -    spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>>
>>>   unpark:
>>>       kthread_unpark(sched->thread);
>>> @@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sched_job *job,
>>>       job->sched = sched;
>>>       job->entity = entity;
>>>       job->s_priority = entity->rq - sched->sched_rq;
>>> -    job->s_fence = drm_sched_fence_create(entity, owner);
>>> +    job->s_fence = drm_sched_fence_create(entity, owner, job);
>>>       if (!job->s_fence)
>>>           return -ENOMEM;
>>>       job->id = atomic64_inc_return(&sched->job_id_count);
>>> @@ -593,15 +582,25 @@ static void drm_sched_process_job(struct
>>> dma_fence *f, struct dma_fence_cb *cb)
>>>       struct drm_sched_fence *s_fence =
>>>           container_of(cb, struct drm_sched_fence, cb);
>>>       struct drm_gpu_scheduler *sched = s_fence->sched;
>>> +    struct drm_sched_job *s_job = s_fence->s_job;
>>
>> Seems we are back to original, Christian argued s_fence and s_job are 
>> with different lifetime , Do their lifetime become same now?
>
> No, that still doesn't work. The scheduler fence lives much longer 
> than the job, so we would have a dangling pointer here.
>
> The real question is why we actually need that? I mean we just need to 
> move the callback structure into the job, don't we?
>
> Christian.

So let's say I move the .cb from drm_sched_fence to drm_sched_job, and 
there it's ok to reference
job->s_fence because s_fence life as at least as the job, right ?

Andrey

>
>>
>> -David
>>
>>> +    unsigned long flags;
>>> +
>>> +    cancel_delayed_work(&sched->work_tdr);
>>>
>>> -    dma_fence_get(&s_fence->finished);
>>>       atomic_dec(&sched->hw_rq_count);
>>>       atomic_dec(&sched->num_jobs);
>>> +
>>> +    spin_lock_irqsave(&sched->job_list_lock, flags);
>>> +    /* remove job from ring_mirror_list */
>>> +    list_del_init(&s_job->node);
>>> +    spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>> +
>>>       drm_sched_fence_finished(s_fence);
>>>
>>>       trace_drm_sched_process_job(s_fence);
>>> -    dma_fence_put(&s_fence->finished);
>>>       wake_up_interruptible(&sched->wake_up_worker);
>>> +
>>> +    schedule_work(&s_job->finish_work);
>>>   }
>>>
>>>   /**
>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>>> index c94b592..23855c6 100644
>>> --- a/include/drm/gpu_scheduler.h
>>> +++ b/include/drm/gpu_scheduler.h
>>> @@ -115,6 +115,8 @@ struct drm_sched_rq {
>>>       struct drm_sched_entity        *current_entity;
>>>   };
>>>
>>> +struct drm_sched_job;
>>> +
>>>   /**
>>>    * struct drm_sched_fence - fences corresponding to the scheduling 
>>> of a job.
>>>    */
>>> @@ -160,6 +162,9 @@ struct drm_sched_fence {
>>>            * @owner: job owner for debugging
>>>            */
>>>       void                *owner;
>>> +
>>> +    /* Back pointer to owning job */
>>> +    struct drm_sched_job         *s_job;
>>>   };
>>>
>>>   struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); @@
>>> -330,8 +335,9 @@ void drm_sched_entity_set_priority(struct
>>> drm_sched_entity *entity,
>>>                      enum drm_sched_priority priority);  bool
>>> drm_sched_entity_is_ready(struct drm_sched_entity *entity);
>>>
>>> -struct drm_sched_fence *drm_sched_fence_create(
>>> -    struct drm_sched_entity *s_entity, void *owner);
>>> +struct drm_sched_fence *drm_sched_fence_create(struct
>>> drm_sched_entity *s_entity,
>>> +                           void *owner,
>>> +                           struct drm_sched_job *s_job);
>>>   void drm_sched_fence_scheduled(struct drm_sched_fence *fence);  void
>>> drm_sched_fence_finished(struct drm_sched_fence *fence);
>>>
>>> -- 
>>> 2.7.4
>>>
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
Christian König Dec. 7, 2018, 3:26 p.m. UTC | #5
Am 07.12.18 um 16:22 schrieb Grodzovsky, Andrey:
>
> On 12/07/2018 03:19 AM, Christian König wrote:
>> Am 07.12.18 um 04:18 schrieb Zhou, David(ChunMing):
>>>> -----Original Message-----
>>>> From: dri-devel <dri-devel-bounces@lists.freedesktop.org> On Behalf Of
>>>> Andrey Grodzovsky
>>>> Sent: Friday, December 07, 2018 1:41 AM
>>>> To: dri-devel@lists.freedesktop.org; amd-gfx@lists.freedesktop.org;
>>>> ckoenig.leichtzumerken@gmail.com; eric@anholt.net;
>>>> etnaviv@lists.freedesktop.org
>>>> Cc: Liu, Monk <Monk.Liu@amd.com>
>>>> Subject: [PATCH 2/2] drm/sched: Rework HW fence processing.
>>>>
>>>> Expedite job deletion from ring mirror list to the HW fence signal
>>>> callback
>>>> instead from finish_work, together with waiting for all such fences
>>>> to signal in
>>>> drm_sched_stop we garantee that already signaled job will not be
>>>> processed
>>>> twice.
>>>> Remove the sched finish fence callback and just submit finish_work
>>>> directly
>>>> from the HW fence callback.
>>>>
>>>> Suggested-by: Christian Koenig <Christian.Koenig@amd.com>
>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/scheduler/sched_fence.c |  4 +++-
>>>> drivers/gpu/drm/scheduler/sched_main.c  | 39 ++++++++++++++++----------
>>>> -------
>>>>    include/drm/gpu_scheduler.h             | 10 +++++++--
>>>>    3 files changed, 30 insertions(+), 23 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c
>>>> b/drivers/gpu/drm/scheduler/sched_fence.c
>>>> index d8d2dff..e62c239 100644
>>>> --- a/drivers/gpu/drm/scheduler/sched_fence.c
>>>> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
>>>> @@ -151,7 +151,8 @@ struct drm_sched_fence
>>>> *to_drm_sched_fence(struct dma_fence *f)
>>>> EXPORT_SYMBOL(to_drm_sched_fence);
>>>>
>>>>    struct drm_sched_fence *drm_sched_fence_create(struct
>>>> drm_sched_entity *entity,
>>>> -                           void *owner)
>>>> +                           void *owner,
>>>> +                           struct drm_sched_job *s_job)
>>>>    {
>>>>        struct drm_sched_fence *fence = NULL;
>>>>        unsigned seq;
>>>> @@ -163,6 +164,7 @@ struct drm_sched_fence
>>>> *drm_sched_fence_create(struct drm_sched_entity *entity,
>>>>        fence->owner = owner;
>>>>        fence->sched = entity->rq->sched;
>>>>        spin_lock_init(&fence->lock);
>>>> +    fence->s_job = s_job;
>>>>
>>>>        seq = atomic_inc_return(&entity->fence_seq);
>>>>        dma_fence_init(&fence->scheduled,
>>>> &drm_sched_fence_ops_scheduled, diff --git
>>>> a/drivers/gpu/drm/scheduler/sched_main.c
>>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>>> index 8fb7f86..2860037 100644
>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>>> @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct
>>>> work_struct *work)
>>>>        cancel_delayed_work_sync(&sched->work_tdr);
>>>>
>>>>        spin_lock_irqsave(&sched->job_list_lock, flags);
>>>> -    /* remove job from ring_mirror_list */
>>>> -    list_del_init(&s_job->node);
>>>> -    /* queue TDR for next job */
>>>>        drm_sched_start_timeout(sched);
>>>>        spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>>>
>>>>        sched->ops->free_job(s_job);
>>>>    }
>>>>
>>>> -static void drm_sched_job_finish_cb(struct dma_fence *f,
>>>> -                    struct dma_fence_cb *cb)
>>>> -{
>>>> -    struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
>>>> -                         finish_cb);
>>>> -    schedule_work(&job->finish_work);
>>>> -}
>>>> -
>>>>    static void drm_sched_job_begin(struct drm_sched_job *s_job)  {
>>>>        struct drm_gpu_scheduler *sched = s_job->sched;
>>>>        unsigned long flags;
>>>>
>>>> - dma_fence_add_callback(&s_job->s_fence->finished, &s_job-
>>>>> finish_cb,
>>>> -                   drm_sched_job_finish_cb);
>>>> -
>>>>        spin_lock_irqsave(&sched->job_list_lock, flags);
>>>>        list_add_tail(&s_job->node, &sched->ring_mirror_list);
>>>>        drm_sched_start_timeout(sched);
>>>> @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler
>>>> *sched, bool unpark_only)  {
>>>>        struct drm_sched_job *s_job, *tmp;
>>>>        bool found_guilty = false;
>>>> -    unsigned long flags;
>>>>        int r;
>>>>
>>>>        if (unpark_only)
>>>>            goto unpark;
>>>>
>>>> -    spin_lock_irqsave(&sched->job_list_lock, flags);
>>>> +    /*
>>>> +     * Locking the list is not required here as the sched thread is
>>>> parked
>>>> +     * so no new jobs are being pushed in to HW and in drm_sched_stop
>>>> we
>>>> +     * flushed any in flight jobs who didn't signal yet. Also
>>>> concurrent
>>>> +     * GPU recovers can't run in parallel.
>>>> +     */
>>>>        list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list,
>>>> node) {
>>>>            struct drm_sched_fence *s_fence = s_job->s_fence;
>>>>            struct dma_fence *fence;
>>>> @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler
>>>> *sched, bool unpark_only)
>>>>        }
>>>>
>>>>        drm_sched_start_timeout(sched);
>>>> -    spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>>>
>>>>    unpark:
>>>>        kthread_unpark(sched->thread);
>>>> @@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sched_job *job,
>>>>        job->sched = sched;
>>>>        job->entity = entity;
>>>>        job->s_priority = entity->rq - sched->sched_rq;
>>>> -    job->s_fence = drm_sched_fence_create(entity, owner);
>>>> +    job->s_fence = drm_sched_fence_create(entity, owner, job);
>>>>        if (!job->s_fence)
>>>>            return -ENOMEM;
>>>>        job->id = atomic64_inc_return(&sched->job_id_count);
>>>> @@ -593,15 +582,25 @@ static void drm_sched_process_job(struct
>>>> dma_fence *f, struct dma_fence_cb *cb)
>>>>        struct drm_sched_fence *s_fence =
>>>>            container_of(cb, struct drm_sched_fence, cb);
>>>>        struct drm_gpu_scheduler *sched = s_fence->sched;
>>>> +    struct drm_sched_job *s_job = s_fence->s_job;
>>> Seems we are back to original, Christian argued s_fence and s_job are
>>> with different lifetime , Do their lifetime become same now?
>> No, that still doesn't work. The scheduler fence lives much longer
>> than the job, so we would have a dangling pointer here.
>>
>> The real question is why we actually need that? I mean we just need to
>> move the callback structure into the job, don't we?
>>
>> Christian.
> So let's say I move the .cb from drm_sched_fence to drm_sched_job, and
> there it's ok to reference
> job->s_fence because s_fence life as at least as the job, right ?

Exactly. Well, we could actually make the live of s_fence a bit shorter 
and drop it as soon as it is signaled.

Christian.

>
> Andrey
>
>>> -David
>>>
>>>> +    unsigned long flags;
>>>> +
>>>> +    cancel_delayed_work(&sched->work_tdr);
>>>>
>>>> -    dma_fence_get(&s_fence->finished);
>>>>        atomic_dec(&sched->hw_rq_count);
>>>>        atomic_dec(&sched->num_jobs);
>>>> +
>>>> +    spin_lock_irqsave(&sched->job_list_lock, flags);
>>>> +    /* remove job from ring_mirror_list */
>>>> +    list_del_init(&s_job->node);
>>>> +    spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>>> +
>>>>        drm_sched_fence_finished(s_fence);
>>>>
>>>>        trace_drm_sched_process_job(s_fence);
>>>> -    dma_fence_put(&s_fence->finished);
>>>>        wake_up_interruptible(&sched->wake_up_worker);
>>>> +
>>>> +    schedule_work(&s_job->finish_work);
>>>>    }
>>>>
>>>>    /**
>>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>>>> index c94b592..23855c6 100644
>>>> --- a/include/drm/gpu_scheduler.h
>>>> +++ b/include/drm/gpu_scheduler.h
>>>> @@ -115,6 +115,8 @@ struct drm_sched_rq {
>>>>        struct drm_sched_entity        *current_entity;
>>>>    };
>>>>
>>>> +struct drm_sched_job;
>>>> +
>>>>    /**
>>>>     * struct drm_sched_fence - fences corresponding to the scheduling
>>>> of a job.
>>>>     */
>>>> @@ -160,6 +162,9 @@ struct drm_sched_fence {
>>>>             * @owner: job owner for debugging
>>>>             */
>>>>        void                *owner;
>>>> +
>>>> +    /* Back pointer to owning job */
>>>> +    struct drm_sched_job         *s_job;
>>>>    };
>>>>
>>>>    struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); @@
>>>> -330,8 +335,9 @@ void drm_sched_entity_set_priority(struct
>>>> drm_sched_entity *entity,
>>>>                       enum drm_sched_priority priority);  bool
>>>> drm_sched_entity_is_ready(struct drm_sched_entity *entity);
>>>>
>>>> -struct drm_sched_fence *drm_sched_fence_create(
>>>> -    struct drm_sched_entity *s_entity, void *owner);
>>>> +struct drm_sched_fence *drm_sched_fence_create(struct
>>>> drm_sched_entity *s_entity,
>>>> +                           void *owner,
>>>> +                           struct drm_sched_job *s_job);
>>>>    void drm_sched_fence_scheduled(struct drm_sched_fence *fence);  void
>>>> drm_sched_fence_finished(struct drm_sched_fence *fence);
>>>>
>>>> -- 
>>>> 2.7.4
>>>>
>>>> _______________________________________________
>>>> dri-devel mailing list
>>>> dri-devel@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
diff mbox series

Patch

diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
index d8d2dff..e62c239 100644
--- a/drivers/gpu/drm/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/scheduler/sched_fence.c
@@ -151,7 +151,8 @@  struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
 EXPORT_SYMBOL(to_drm_sched_fence);
 
 struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity,
-					       void *owner)
+					       void *owner,
+					       struct drm_sched_job *s_job)
 {
 	struct drm_sched_fence *fence = NULL;
 	unsigned seq;
@@ -163,6 +164,7 @@  struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity,
 	fence->owner = owner;
 	fence->sched = entity->rq->sched;
 	spin_lock_init(&fence->lock);
+	fence->s_job = s_job;
 
 	seq = atomic_inc_return(&entity->fence_seq);
 	dma_fence_init(&fence->scheduled, &drm_sched_fence_ops_scheduled,
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 8fb7f86..2860037 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -284,31 +284,17 @@  static void drm_sched_job_finish(struct work_struct *work)
 	cancel_delayed_work_sync(&sched->work_tdr);
 
 	spin_lock_irqsave(&sched->job_list_lock, flags);
-	/* remove job from ring_mirror_list */
-	list_del_init(&s_job->node);
-	/* queue TDR for next job */
 	drm_sched_start_timeout(sched);
 	spin_unlock_irqrestore(&sched->job_list_lock, flags);
 
 	sched->ops->free_job(s_job);
 }
 
-static void drm_sched_job_finish_cb(struct dma_fence *f,
-				    struct dma_fence_cb *cb)
-{
-	struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
-						 finish_cb);
-	schedule_work(&job->finish_work);
-}
-
 static void drm_sched_job_begin(struct drm_sched_job *s_job)
 {
 	struct drm_gpu_scheduler *sched = s_job->sched;
 	unsigned long flags;
 
-	dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb,
-			       drm_sched_job_finish_cb);
-
 	spin_lock_irqsave(&sched->job_list_lock, flags);
 	list_add_tail(&s_job->node, &sched->ring_mirror_list);
 	drm_sched_start_timeout(sched);
@@ -418,13 +404,17 @@  void drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only)
 {
 	struct drm_sched_job *s_job, *tmp;
 	bool found_guilty = false;
-	unsigned long flags;
 	int r;
 
 	if (unpark_only)
 		goto unpark;
 
-	spin_lock_irqsave(&sched->job_list_lock, flags);	
+	/*
+	 * Locking the list is not required here as the sched thread is parked
+	 * so no new jobs are being pushed in to HW and in drm_sched_stop we
+	 * flushed any in flight jobs who didn't signal yet. Also concurrent
+	 * GPU recovers can't run in parallel.
+	 */
 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 		struct drm_sched_fence *s_fence = s_job->s_fence;
 		struct dma_fence *fence;
@@ -453,7 +443,6 @@  void drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only)
 	}
 
 	drm_sched_start_timeout(sched);
-	spin_unlock_irqrestore(&sched->job_list_lock, flags);
 
 unpark:
 	kthread_unpark(sched->thread);
@@ -505,7 +494,7 @@  int drm_sched_job_init(struct drm_sched_job *job,
 	job->sched = sched;
 	job->entity = entity;
 	job->s_priority = entity->rq - sched->sched_rq;
-	job->s_fence = drm_sched_fence_create(entity, owner);
+	job->s_fence = drm_sched_fence_create(entity, owner, job);
 	if (!job->s_fence)
 		return -ENOMEM;
 	job->id = atomic64_inc_return(&sched->job_id_count);
@@ -593,15 +582,25 @@  static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 	struct drm_sched_fence *s_fence =
 		container_of(cb, struct drm_sched_fence, cb);
 	struct drm_gpu_scheduler *sched = s_fence->sched;
+	struct drm_sched_job *s_job = s_fence->s_job;
+	unsigned long flags;
+
+	cancel_delayed_work(&sched->work_tdr);
 
-	dma_fence_get(&s_fence->finished);
 	atomic_dec(&sched->hw_rq_count);
 	atomic_dec(&sched->num_jobs);
+
+	spin_lock_irqsave(&sched->job_list_lock, flags);
+	/* remove job from ring_mirror_list */
+	list_del_init(&s_job->node);
+	spin_unlock_irqrestore(&sched->job_list_lock, flags);
+
 	drm_sched_fence_finished(s_fence);
 
 	trace_drm_sched_process_job(s_fence);
-	dma_fence_put(&s_fence->finished);
 	wake_up_interruptible(&sched->wake_up_worker);
+
+	schedule_work(&s_job->finish_work);
 }
 
 /**
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index c94b592..23855c6 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -115,6 +115,8 @@  struct drm_sched_rq {
 	struct drm_sched_entity		*current_entity;
 };
 
+struct drm_sched_job;
+
 /**
  * struct drm_sched_fence - fences corresponding to the scheduling of a job.
  */
@@ -160,6 +162,9 @@  struct drm_sched_fence {
          * @owner: job owner for debugging
          */
 	void				*owner;
+
+	/* Back pointer to owning job */
+	struct drm_sched_job 		*s_job;
 };
 
 struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
@@ -330,8 +335,9 @@  void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
 				   enum drm_sched_priority priority);
 bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
 
-struct drm_sched_fence *drm_sched_fence_create(
-	struct drm_sched_entity *s_entity, void *owner);
+struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *s_entity,
+					       void *owner,
+					       struct drm_sched_job *s_job);
 void drm_sched_fence_scheduled(struct drm_sched_fence *fence);
 void drm_sched_fence_finished(struct drm_sched_fence *fence);