diff mbox series

[v3,4/9] drm/scheduler: Add fence deadline support

Message ID 20210903184806.1680887-5-robdclark@gmail.com (mailing list archive)
State New, archived
Headers show
Series dma-fence: Deadline awareness | expand

Commit Message

Rob Clark Sept. 3, 2021, 6:47 p.m. UTC
From: Rob Clark <robdclark@chromium.org>

As the finished fence is the one that is exposed to userspace, and
therefore the one that other operations, like atomic update, would
block on, we need to propagate the deadline from from the finished
fence to the actual hw fence.

v2: Split into drm_sched_fence_set_parent() (ckoenig)

Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/scheduler/sched_fence.c | 34 +++++++++++++++++++++++++
 drivers/gpu/drm/scheduler/sched_main.c  |  2 +-
 include/drm/gpu_scheduler.h             |  8 ++++++
 3 files changed, 43 insertions(+), 1 deletion(-)

Comments

Daniel Vetter Sept. 8, 2021, 5:45 p.m. UTC | #1
On Fri, Sep 03, 2021 at 11:47:55AM -0700, Rob Clark wrote:
> From: Rob Clark <robdclark@chromium.org>
> 
> As the finished fence is the one that is exposed to userspace, and
> therefore the one that other operations, like atomic update, would
> block on, we need to propagate the deadline from from the finished
> fence to the actual hw fence.
> 
> v2: Split into drm_sched_fence_set_parent() (ckoenig)
> 
> Signed-off-by: Rob Clark <robdclark@chromium.org>
> ---
>  drivers/gpu/drm/scheduler/sched_fence.c | 34 +++++++++++++++++++++++++
>  drivers/gpu/drm/scheduler/sched_main.c  |  2 +-
>  include/drm/gpu_scheduler.h             |  8 ++++++
>  3 files changed, 43 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
> index bcea035cf4c6..4fc41a71d1c7 100644
> --- a/drivers/gpu/drm/scheduler/sched_fence.c
> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> @@ -128,6 +128,30 @@ static void drm_sched_fence_release_finished(struct dma_fence *f)
>  	dma_fence_put(&fence->scheduled);
>  }
>  
> +static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
> +						  ktime_t deadline)
> +{
> +	struct drm_sched_fence *fence = to_drm_sched_fence(f);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&fence->lock, flags);
> +
> +	/* If we already have an earlier deadline, keep it: */
> +	if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) &&
> +	    ktime_before(fence->deadline, deadline)) {
> +		spin_unlock_irqrestore(&fence->lock, flags);
> +		return;
> +	}
> +
> +	fence->deadline = deadline;
> +	set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
> +
> +	spin_unlock_irqrestore(&fence->lock, flags);
> +
> +	if (fence->parent)
> +		dma_fence_set_deadline(fence->parent, deadline);
> +}
> +
>  static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
>  	.get_driver_name = drm_sched_fence_get_driver_name,
>  	.get_timeline_name = drm_sched_fence_get_timeline_name,
> @@ -138,6 +162,7 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = {
>  	.get_driver_name = drm_sched_fence_get_driver_name,
>  	.get_timeline_name = drm_sched_fence_get_timeline_name,
>  	.release = drm_sched_fence_release_finished,
> +	.set_deadline = drm_sched_fence_set_deadline_finished,
>  };
>  
>  struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
> @@ -152,6 +177,15 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
>  }
>  EXPORT_SYMBOL(to_drm_sched_fence);
>  
> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
> +				struct dma_fence *fence)
> +{
> +	s_fence->parent = dma_fence_get(fence);
> +	if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
> +		     &s_fence->finished.flags))

Don't you need the spinlock here too to avoid races? test_bit is very
unordered, so guarantees nothing. Spinlock would need to be both around
->parent = and the test_bit.

Entirely aside, but there's discussions going on to preallocate the hw
fence somehow. If we do that we could make the deadline forwarding
lockless here. Having a spinlock just to set the parent is a bit annoying
...

Alternative is that you do this locklessly with barriers and a _lot_ of
comments. Would be good to benchmark whether the overhead matters though
first.
-Daniel

> +		dma_fence_set_deadline(fence, s_fence->deadline);
> +}
> +
>  struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
>  					      void *owner)
>  {
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 595e47ff7d06..27bf0ac0625f 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -978,7 +978,7 @@ static int drm_sched_main(void *param)
>  		drm_sched_fence_scheduled(s_fence);
>  
>  		if (!IS_ERR_OR_NULL(fence)) {
> -			s_fence->parent = dma_fence_get(fence);
> +			drm_sched_fence_set_parent(s_fence, fence);
>  			r = dma_fence_add_callback(fence, &sched_job->cb,
>  						   drm_sched_job_done_cb);
>  			if (r == -ENOENT)
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 7f77a455722c..158ddd662469 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -238,6 +238,12 @@ struct drm_sched_fence {
>           */
>  	struct dma_fence		finished;
>  
> +	/**
> +	 * @deadline: deadline set on &drm_sched_fence.finished which
> +	 * potentially needs to be propagated to &drm_sched_fence.parent
> +	 */
> +	ktime_t				deadline;
> +
>          /**
>           * @parent: the fence returned by &drm_sched_backend_ops.run_job
>           * when scheduling the job on hardware. We signal the
> @@ -505,6 +511,8 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
>  				   enum drm_sched_priority priority);
>  bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
>  
> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
> +				struct dma_fence *fence);
>  struct drm_sched_fence *drm_sched_fence_alloc(
>  	struct drm_sched_entity *s_entity, void *owner);
>  void drm_sched_fence_init(struct drm_sched_fence *fence,
> -- 
> 2.31.1
>
Christian König Sept. 9, 2021, 6:22 a.m. UTC | #2
Am 08.09.21 um 19:45 schrieb Daniel Vetter:
> On Fri, Sep 03, 2021 at 11:47:55AM -0700, Rob Clark wrote:
>> From: Rob Clark <robdclark@chromium.org>
>>
>> As the finished fence is the one that is exposed to userspace, and
>> therefore the one that other operations, like atomic update, would
>> block on, we need to propagate the deadline from from the finished
>> fence to the actual hw fence.
>>
>> v2: Split into drm_sched_fence_set_parent() (ckoenig)
>>
>> Signed-off-by: Rob Clark <robdclark@chromium.org>
>> ---
>>   drivers/gpu/drm/scheduler/sched_fence.c | 34 +++++++++++++++++++++++++
>>   drivers/gpu/drm/scheduler/sched_main.c  |  2 +-
>>   include/drm/gpu_scheduler.h             |  8 ++++++
>>   3 files changed, 43 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
>> index bcea035cf4c6..4fc41a71d1c7 100644
>> --- a/drivers/gpu/drm/scheduler/sched_fence.c
>> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
>> @@ -128,6 +128,30 @@ static void drm_sched_fence_release_finished(struct dma_fence *f)
>>   	dma_fence_put(&fence->scheduled);
>>   }
>>   
>> +static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
>> +						  ktime_t deadline)
>> +{
>> +	struct drm_sched_fence *fence = to_drm_sched_fence(f);
>> +	unsigned long flags;
>> +
>> +	spin_lock_irqsave(&fence->lock, flags);
>> +
>> +	/* If we already have an earlier deadline, keep it: */
>> +	if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) &&
>> +	    ktime_before(fence->deadline, deadline)) {
>> +		spin_unlock_irqrestore(&fence->lock, flags);
>> +		return;
>> +	}
>> +
>> +	fence->deadline = deadline;
>> +	set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
>> +
>> +	spin_unlock_irqrestore(&fence->lock, flags);
>> +
>> +	if (fence->parent)
>> +		dma_fence_set_deadline(fence->parent, deadline);
>> +}
>> +
>>   static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
>>   	.get_driver_name = drm_sched_fence_get_driver_name,
>>   	.get_timeline_name = drm_sched_fence_get_timeline_name,
>> @@ -138,6 +162,7 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = {
>>   	.get_driver_name = drm_sched_fence_get_driver_name,
>>   	.get_timeline_name = drm_sched_fence_get_timeline_name,
>>   	.release = drm_sched_fence_release_finished,
>> +	.set_deadline = drm_sched_fence_set_deadline_finished,
>>   };
>>   
>>   struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
>> @@ -152,6 +177,15 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
>>   }
>>   EXPORT_SYMBOL(to_drm_sched_fence);
>>   
>> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
>> +				struct dma_fence *fence)
>> +{
>> +	s_fence->parent = dma_fence_get(fence);
>> +	if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
>> +		     &s_fence->finished.flags))
> Don't you need the spinlock here too to avoid races? test_bit is very
> unordered, so guarantees nothing. Spinlock would need to be both around
> ->parent = and the test_bit.
>
> Entirely aside, but there's discussions going on to preallocate the hw
> fence somehow. If we do that we could make the deadline forwarding
> lockless here. Having a spinlock just to set the parent is a bit annoying

Well previously we didn't needed the spinlock to set the parent because 
the parent wasn't used outside of the main thread.

This becomes an issue now because we can race with setting the deadline. 
So yeah we probably do need the spinlock here now.

Christian.

> ...
>
> Alternative is that you do this locklessly with barriers and a _lot_ of
> comments. Would be good to benchmark whether the overhead matters though
> first.
> -Daniel
>
>> +		dma_fence_set_deadline(fence, s_fence->deadline);
>> +}
>> +
>>   struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
>>   					      void *owner)
>>   {
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>> index 595e47ff7d06..27bf0ac0625f 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -978,7 +978,7 @@ static int drm_sched_main(void *param)
>>   		drm_sched_fence_scheduled(s_fence);
>>   
>>   		if (!IS_ERR_OR_NULL(fence)) {
>> -			s_fence->parent = dma_fence_get(fence);
>> +			drm_sched_fence_set_parent(s_fence, fence);
>>   			r = dma_fence_add_callback(fence, &sched_job->cb,
>>   						   drm_sched_job_done_cb);
>>   			if (r == -ENOENT)
>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>> index 7f77a455722c..158ddd662469 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -238,6 +238,12 @@ struct drm_sched_fence {
>>            */
>>   	struct dma_fence		finished;
>>   
>> +	/**
>> +	 * @deadline: deadline set on &drm_sched_fence.finished which
>> +	 * potentially needs to be propagated to &drm_sched_fence.parent
>> +	 */
>> +	ktime_t				deadline;
>> +
>>           /**
>>            * @parent: the fence returned by &drm_sched_backend_ops.run_job
>>            * when scheduling the job on hardware. We signal the
>> @@ -505,6 +511,8 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
>>   				   enum drm_sched_priority priority);
>>   bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
>>   
>> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
>> +				struct dma_fence *fence);
>>   struct drm_sched_fence *drm_sched_fence_alloc(
>>   	struct drm_sched_entity *s_entity, void *owner);
>>   void drm_sched_fence_init(struct drm_sched_fence *fence,
>> -- 
>> 2.31.1
>>
Daniel Vetter Sept. 14, 2021, 1:38 p.m. UTC | #3
On Thu, Sep 09, 2021 at 08:22:59AM +0200, Christian König wrote:
> Am 08.09.21 um 19:45 schrieb Daniel Vetter:
> > On Fri, Sep 03, 2021 at 11:47:55AM -0700, Rob Clark wrote:
> > > From: Rob Clark <robdclark@chromium.org>
> > > 
> > > As the finished fence is the one that is exposed to userspace, and
> > > therefore the one that other operations, like atomic update, would
> > > block on, we need to propagate the deadline from from the finished
> > > fence to the actual hw fence.
> > > 
> > > v2: Split into drm_sched_fence_set_parent() (ckoenig)
> > > 
> > > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > > ---
> > >   drivers/gpu/drm/scheduler/sched_fence.c | 34 +++++++++++++++++++++++++
> > >   drivers/gpu/drm/scheduler/sched_main.c  |  2 +-
> > >   include/drm/gpu_scheduler.h             |  8 ++++++
> > >   3 files changed, 43 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
> > > index bcea035cf4c6..4fc41a71d1c7 100644
> > > --- a/drivers/gpu/drm/scheduler/sched_fence.c
> > > +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> > > @@ -128,6 +128,30 @@ static void drm_sched_fence_release_finished(struct dma_fence *f)
> > >   	dma_fence_put(&fence->scheduled);
> > >   }
> > > +static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
> > > +						  ktime_t deadline)
> > > +{
> > > +	struct drm_sched_fence *fence = to_drm_sched_fence(f);
> > > +	unsigned long flags;
> > > +
> > > +	spin_lock_irqsave(&fence->lock, flags);
> > > +
> > > +	/* If we already have an earlier deadline, keep it: */
> > > +	if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) &&
> > > +	    ktime_before(fence->deadline, deadline)) {
> > > +		spin_unlock_irqrestore(&fence->lock, flags);
> > > +		return;
> > > +	}
> > > +
> > > +	fence->deadline = deadline;
> > > +	set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
> > > +
> > > +	spin_unlock_irqrestore(&fence->lock, flags);
> > > +
> > > +	if (fence->parent)
> > > +		dma_fence_set_deadline(fence->parent, deadline);
> > > +}
> > > +
> > >   static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
> > >   	.get_driver_name = drm_sched_fence_get_driver_name,
> > >   	.get_timeline_name = drm_sched_fence_get_timeline_name,
> > > @@ -138,6 +162,7 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = {
> > >   	.get_driver_name = drm_sched_fence_get_driver_name,
> > >   	.get_timeline_name = drm_sched_fence_get_timeline_name,
> > >   	.release = drm_sched_fence_release_finished,
> > > +	.set_deadline = drm_sched_fence_set_deadline_finished,
> > >   };
> > >   struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
> > > @@ -152,6 +177,15 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
> > >   }
> > >   EXPORT_SYMBOL(to_drm_sched_fence);
> > > +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
> > > +				struct dma_fence *fence)
> > > +{
> > > +	s_fence->parent = dma_fence_get(fence);
> > > +	if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
> > > +		     &s_fence->finished.flags))
> > Don't you need the spinlock here too to avoid races? test_bit is very
> > unordered, so guarantees nothing. Spinlock would need to be both around
> > ->parent = and the test_bit.
> > 
> > Entirely aside, but there's discussions going on to preallocate the hw
> > fence somehow. If we do that we could make the deadline forwarding
> > lockless here. Having a spinlock just to set the parent is a bit annoying
> 
> Well previously we didn't needed the spinlock to set the parent because the
> parent wasn't used outside of the main thread.
> 
> This becomes an issue now because we can race with setting the deadline. So
> yeah we probably do need the spinlock here now.

Yeah tbh this is the part I don't like, because it means the scheduler
thread locking becomes more complex.

We might need to look at this again when we include stuff like boosting
priorities and things like that. Maybe it would be better if we have a
request queue which the scheduler thread could then pick up whenever it
gets around to the next scheduling decision?

I think for now just the spinlock in more places should be fine.
-Daniel


> Christian.
> 
> > ...
> > 
> > Alternative is that you do this locklessly with barriers and a _lot_ of
> > comments. Would be good to benchmark whether the overhead matters though
> > first.
> > -Daniel
> > 
> > > +		dma_fence_set_deadline(fence, s_fence->deadline);
> > > +}
> > > +
> > >   struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
> > >   					      void *owner)
> > >   {
> > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> > > index 595e47ff7d06..27bf0ac0625f 100644
> > > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > > @@ -978,7 +978,7 @@ static int drm_sched_main(void *param)
> > >   		drm_sched_fence_scheduled(s_fence);
> > >   		if (!IS_ERR_OR_NULL(fence)) {
> > > -			s_fence->parent = dma_fence_get(fence);
> > > +			drm_sched_fence_set_parent(s_fence, fence);
> > >   			r = dma_fence_add_callback(fence, &sched_job->cb,
> > >   						   drm_sched_job_done_cb);
> > >   			if (r == -ENOENT)
> > > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> > > index 7f77a455722c..158ddd662469 100644
> > > --- a/include/drm/gpu_scheduler.h
> > > +++ b/include/drm/gpu_scheduler.h
> > > @@ -238,6 +238,12 @@ struct drm_sched_fence {
> > >            */
> > >   	struct dma_fence		finished;
> > > +	/**
> > > +	 * @deadline: deadline set on &drm_sched_fence.finished which
> > > +	 * potentially needs to be propagated to &drm_sched_fence.parent
> > > +	 */
> > > +	ktime_t				deadline;
> > > +
> > >           /**
> > >            * @parent: the fence returned by &drm_sched_backend_ops.run_job
> > >            * when scheduling the job on hardware. We signal the
> > > @@ -505,6 +511,8 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
> > >   				   enum drm_sched_priority priority);
> > >   bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
> > > +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
> > > +				struct dma_fence *fence);
> > >   struct drm_sched_fence *drm_sched_fence_alloc(
> > >   	struct drm_sched_entity *s_entity, void *owner);
> > >   void drm_sched_fence_init(struct drm_sched_fence *fence,
> > > -- 
> > > 2.31.1
> > > 
>
Rob Clark Sept. 21, 2021, 3:57 p.m. UTC | #4
On Wed, Sep 8, 2021 at 10:45 AM Daniel Vetter <daniel@ffwll.ch> wrote:
>
> On Fri, Sep 03, 2021 at 11:47:55AM -0700, Rob Clark wrote:
> > From: Rob Clark <robdclark@chromium.org>
> >
> > As the finished fence is the one that is exposed to userspace, and
> > therefore the one that other operations, like atomic update, would
> > block on, we need to propagate the deadline from from the finished
> > fence to the actual hw fence.
> >
> > v2: Split into drm_sched_fence_set_parent() (ckoenig)
> >
> > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > ---
> >  drivers/gpu/drm/scheduler/sched_fence.c | 34 +++++++++++++++++++++++++
> >  drivers/gpu/drm/scheduler/sched_main.c  |  2 +-
> >  include/drm/gpu_scheduler.h             |  8 ++++++
> >  3 files changed, 43 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
> > index bcea035cf4c6..4fc41a71d1c7 100644
> > --- a/drivers/gpu/drm/scheduler/sched_fence.c
> > +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> > @@ -128,6 +128,30 @@ static void drm_sched_fence_release_finished(struct dma_fence *f)
> >       dma_fence_put(&fence->scheduled);
> >  }
> >
> > +static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
> > +                                               ktime_t deadline)
> > +{
> > +     struct drm_sched_fence *fence = to_drm_sched_fence(f);
> > +     unsigned long flags;
> > +
> > +     spin_lock_irqsave(&fence->lock, flags);
> > +
> > +     /* If we already have an earlier deadline, keep it: */
> > +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) &&
> > +         ktime_before(fence->deadline, deadline)) {
> > +             spin_unlock_irqrestore(&fence->lock, flags);
> > +             return;
> > +     }
> > +
> > +     fence->deadline = deadline;
> > +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
> > +
> > +     spin_unlock_irqrestore(&fence->lock, flags);
> > +
> > +     if (fence->parent)
> > +             dma_fence_set_deadline(fence->parent, deadline);
> > +}
> > +
> >  static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
> >       .get_driver_name = drm_sched_fence_get_driver_name,
> >       .get_timeline_name = drm_sched_fence_get_timeline_name,
> > @@ -138,6 +162,7 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = {
> >       .get_driver_name = drm_sched_fence_get_driver_name,
> >       .get_timeline_name = drm_sched_fence_get_timeline_name,
> >       .release = drm_sched_fence_release_finished,
> > +     .set_deadline = drm_sched_fence_set_deadline_finished,
> >  };
> >
> >  struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
> > @@ -152,6 +177,15 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
> >  }
> >  EXPORT_SYMBOL(to_drm_sched_fence);
> >
> > +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
> > +                             struct dma_fence *fence)
> > +{
> > +     s_fence->parent = dma_fence_get(fence);
> > +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
> > +                  &s_fence->finished.flags))
>
> Don't you need the spinlock here too to avoid races? test_bit is very
> unordered, so guarantees nothing. Spinlock would need to be both around
> ->parent = and the test_bit.
>
> Entirely aside, but there's discussions going on to preallocate the hw
> fence somehow. If we do that we could make the deadline forwarding
> lockless here. Having a spinlock just to set the parent is a bit annoying
> ...
>
> Alternative is that you do this locklessly with barriers and a _lot_ of
> comments. Would be good to benchmark whether the overhead matters though
> first.

So, my thinking is that very few (well no) guarantees are made to the
fence implementor that their ->set_deadline() is not called multiple
times, from multiple threads, etc.  And no guarantee that a later
deadline is set after an earlier deadline has been set.  It is all up
to the set_deadline() implementation to deal with these cases.

So that means we just need the appropriate barrier-fu to ensure
another thread calling drm_sched_fence_set_deadline_finished() sees
fence->parent set before the test_bit.  It could mean that the backend
implementation sees the same deadline set twice, but that is fine.

BR,
-R

> -Daniel
>
> > +             dma_fence_set_deadline(fence, s_fence->deadline);
> > +}
> > +
> >  struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
> >                                             void *owner)
> >  {
> > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> > index 595e47ff7d06..27bf0ac0625f 100644
> > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > @@ -978,7 +978,7 @@ static int drm_sched_main(void *param)
> >               drm_sched_fence_scheduled(s_fence);
> >
> >               if (!IS_ERR_OR_NULL(fence)) {
> > -                     s_fence->parent = dma_fence_get(fence);
> > +                     drm_sched_fence_set_parent(s_fence, fence);
> >                       r = dma_fence_add_callback(fence, &sched_job->cb,
> >                                                  drm_sched_job_done_cb);
> >                       if (r == -ENOENT)
> > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> > index 7f77a455722c..158ddd662469 100644
> > --- a/include/drm/gpu_scheduler.h
> > +++ b/include/drm/gpu_scheduler.h
> > @@ -238,6 +238,12 @@ struct drm_sched_fence {
> >           */
> >       struct dma_fence                finished;
> >
> > +     /**
> > +      * @deadline: deadline set on &drm_sched_fence.finished which
> > +      * potentially needs to be propagated to &drm_sched_fence.parent
> > +      */
> > +     ktime_t                         deadline;
> > +
> >          /**
> >           * @parent: the fence returned by &drm_sched_backend_ops.run_job
> >           * when scheduling the job on hardware. We signal the
> > @@ -505,6 +511,8 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
> >                                  enum drm_sched_priority priority);
> >  bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
> >
> > +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
> > +                             struct dma_fence *fence);
> >  struct drm_sched_fence *drm_sched_fence_alloc(
> >       struct drm_sched_entity *s_entity, void *owner);
> >  void drm_sched_fence_init(struct drm_sched_fence *fence,
> > --
> > 2.31.1
> >
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Rob Clark Sept. 21, 2021, 4:35 p.m. UTC | #5
On Tue, Sep 21, 2021 at 8:57 AM Rob Clark <robdclark@gmail.com> wrote:
>
> On Wed, Sep 8, 2021 at 10:45 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> >
> > On Fri, Sep 03, 2021 at 11:47:55AM -0700, Rob Clark wrote:
> > > From: Rob Clark <robdclark@chromium.org>
> > >
> > > As the finished fence is the one that is exposed to userspace, and
> > > therefore the one that other operations, like atomic update, would
> > > block on, we need to propagate the deadline from from the finished
> > > fence to the actual hw fence.
> > >
> > > v2: Split into drm_sched_fence_set_parent() (ckoenig)
> > >
> > > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > > ---
> > >  drivers/gpu/drm/scheduler/sched_fence.c | 34 +++++++++++++++++++++++++
> > >  drivers/gpu/drm/scheduler/sched_main.c  |  2 +-
> > >  include/drm/gpu_scheduler.h             |  8 ++++++
> > >  3 files changed, 43 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
> > > index bcea035cf4c6..4fc41a71d1c7 100644
> > > --- a/drivers/gpu/drm/scheduler/sched_fence.c
> > > +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> > > @@ -128,6 +128,30 @@ static void drm_sched_fence_release_finished(struct dma_fence *f)
> > >       dma_fence_put(&fence->scheduled);
> > >  }
> > >
> > > +static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
> > > +                                               ktime_t deadline)
> > > +{
> > > +     struct drm_sched_fence *fence = to_drm_sched_fence(f);
> > > +     unsigned long flags;
> > > +
> > > +     spin_lock_irqsave(&fence->lock, flags);
> > > +
> > > +     /* If we already have an earlier deadline, keep it: */
> > > +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) &&
> > > +         ktime_before(fence->deadline, deadline)) {
> > > +             spin_unlock_irqrestore(&fence->lock, flags);
> > > +             return;
> > > +     }
> > > +
> > > +     fence->deadline = deadline;
> > > +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
> > > +
> > > +     spin_unlock_irqrestore(&fence->lock, flags);
> > > +
> > > +     if (fence->parent)
> > > +             dma_fence_set_deadline(fence->parent, deadline);
> > > +}
> > > +
> > >  static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
> > >       .get_driver_name = drm_sched_fence_get_driver_name,
> > >       .get_timeline_name = drm_sched_fence_get_timeline_name,
> > > @@ -138,6 +162,7 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = {
> > >       .get_driver_name = drm_sched_fence_get_driver_name,
> > >       .get_timeline_name = drm_sched_fence_get_timeline_name,
> > >       .release = drm_sched_fence_release_finished,
> > > +     .set_deadline = drm_sched_fence_set_deadline_finished,
> > >  };
> > >
> > >  struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
> > > @@ -152,6 +177,15 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
> > >  }
> > >  EXPORT_SYMBOL(to_drm_sched_fence);
> > >
> > > +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
> > > +                             struct dma_fence *fence)
> > > +{
> > > +     s_fence->parent = dma_fence_get(fence);
> > > +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
> > > +                  &s_fence->finished.flags))
> >
> > Don't you need the spinlock here too to avoid races? test_bit is very
> > unordered, so guarantees nothing. Spinlock would need to be both around
> > ->parent = and the test_bit.
> >
> > Entirely aside, but there's discussions going on to preallocate the hw
> > fence somehow. If we do that we could make the deadline forwarding
> > lockless here. Having a spinlock just to set the parent is a bit annoying
> > ...
> >
> > Alternative is that you do this locklessly with barriers and a _lot_ of
> > comments. Would be good to benchmark whether the overhead matters though
> > first.
>
> So, my thinking is that very few (well no) guarantees are made to the
> fence implementor that their ->set_deadline() is not called multiple
> times, from multiple threads, etc.  And no guarantee that a later
> deadline is set after an earlier deadline has been set.  It is all up
> to the set_deadline() implementation to deal with these cases.
>
> So that means we just need the appropriate barrier-fu to ensure
> another thread calling drm_sched_fence_set_deadline_finished() sees
> fence->parent set before the test_bit.  It could mean that the backend
> implementation sees the same deadline set twice, but that is fine.
>

something like:

-----
diff --git a/drivers/gpu/drm/scheduler/sched_fence.c
b/drivers/gpu/drm/scheduler/sched_fence.c
index 4fc41a71d1c7..7f2af6d1777c 100644
--- a/drivers/gpu/drm/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/scheduler/sched_fence.c
@@ -132,6 +132,7 @@ static void
drm_sched_fence_set_deadline_finished(struct dma_fence *f,
    ktime_t deadline)
 {
  struct drm_sched_fence *fence = to_drm_sched_fence(f);
+ struct dma_fence *parent;
  unsigned long flags;

  spin_lock_irqsave(&fence->lock, flags);
@@ -148,8 +149,9 @@ static void
drm_sched_fence_set_deadline_finished(struct dma_fence *f,

  spin_unlock_irqrestore(&fence->lock, flags);

- if (fence->parent)
- dma_fence_set_deadline(fence->parent, deadline);
+ parent = smp_load_acquire(&fence->parent);
+ if (parent)
+ dma_fence_set_deadline(parent, deadline);
 }

 static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
@@ -180,7 +182,7 @@ EXPORT_SYMBOL(to_drm_sched_fence);
 void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
  struct dma_fence *fence)
 {
- s_fence->parent = dma_fence_get(fence);
+ smp_store_release(&s_fence->parent, dma_fence_get(fence));
  if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
       &s_fence->finished.flags))
  dma_fence_set_deadline(fence, s_fence->deadline);
-----

BR,
-R
Christian König Sept. 21, 2021, 4:45 p.m. UTC | #6
Am 21.09.21 um 18:35 schrieb Rob Clark:
> On Tue, Sep 21, 2021 at 8:57 AM Rob Clark <robdclark@gmail.com> wrote:
>> On Wed, Sep 8, 2021 at 10:45 AM Daniel Vetter <daniel@ffwll.ch> wrote:
>>> On Fri, Sep 03, 2021 at 11:47:55AM -0700, Rob Clark wrote:
>>>> From: Rob Clark <robdclark@chromium.org>
>>>>
>>>> As the finished fence is the one that is exposed to userspace, and
>>>> therefore the one that other operations, like atomic update, would
>>>> block on, we need to propagate the deadline from from the finished
>>>> fence to the actual hw fence.
>>>>
>>>> v2: Split into drm_sched_fence_set_parent() (ckoenig)
>>>>
>>>> Signed-off-by: Rob Clark <robdclark@chromium.org>
>>>> ---
>>>>   drivers/gpu/drm/scheduler/sched_fence.c | 34 +++++++++++++++++++++++++
>>>>   drivers/gpu/drm/scheduler/sched_main.c  |  2 +-
>>>>   include/drm/gpu_scheduler.h             |  8 ++++++
>>>>   3 files changed, 43 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
>>>> index bcea035cf4c6..4fc41a71d1c7 100644
>>>> --- a/drivers/gpu/drm/scheduler/sched_fence.c
>>>> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
>>>> @@ -128,6 +128,30 @@ static void drm_sched_fence_release_finished(struct dma_fence *f)
>>>>        dma_fence_put(&fence->scheduled);
>>>>   }
>>>>
>>>> +static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
>>>> +                                               ktime_t deadline)
>>>> +{
>>>> +     struct drm_sched_fence *fence = to_drm_sched_fence(f);
>>>> +     unsigned long flags;
>>>> +
>>>> +     spin_lock_irqsave(&fence->lock, flags);
>>>> +
>>>> +     /* If we already have an earlier deadline, keep it: */
>>>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) &&
>>>> +         ktime_before(fence->deadline, deadline)) {
>>>> +             spin_unlock_irqrestore(&fence->lock, flags);
>>>> +             return;
>>>> +     }
>>>> +
>>>> +     fence->deadline = deadline;
>>>> +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
>>>> +
>>>> +     spin_unlock_irqrestore(&fence->lock, flags);
>>>> +
>>>> +     if (fence->parent)
>>>> +             dma_fence_set_deadline(fence->parent, deadline);
>>>> +}
>>>> +
>>>>   static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
>>>>        .get_driver_name = drm_sched_fence_get_driver_name,
>>>>        .get_timeline_name = drm_sched_fence_get_timeline_name,
>>>> @@ -138,6 +162,7 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = {
>>>>        .get_driver_name = drm_sched_fence_get_driver_name,
>>>>        .get_timeline_name = drm_sched_fence_get_timeline_name,
>>>>        .release = drm_sched_fence_release_finished,
>>>> +     .set_deadline = drm_sched_fence_set_deadline_finished,
>>>>   };
>>>>
>>>>   struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
>>>> @@ -152,6 +177,15 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
>>>>   }
>>>>   EXPORT_SYMBOL(to_drm_sched_fence);
>>>>
>>>> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
>>>> +                             struct dma_fence *fence)
>>>> +{
>>>> +     s_fence->parent = dma_fence_get(fence);
>>>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
>>>> +                  &s_fence->finished.flags))
>>> Don't you need the spinlock here too to avoid races? test_bit is very
>>> unordered, so guarantees nothing. Spinlock would need to be both around
>>> ->parent = and the test_bit.
>>>
>>> Entirely aside, but there's discussions going on to preallocate the hw
>>> fence somehow. If we do that we could make the deadline forwarding
>>> lockless here. Having a spinlock just to set the parent is a bit annoying
>>> ...
>>>
>>> Alternative is that you do this locklessly with barriers and a _lot_ of
>>> comments. Would be good to benchmark whether the overhead matters though
>>> first.
>> So, my thinking is that very few (well no) guarantees are made to the
>> fence implementor that their ->set_deadline() is not called multiple
>> times, from multiple threads, etc.  And no guarantee that a later
>> deadline is set after an earlier deadline has been set.  It is all up
>> to the set_deadline() implementation to deal with these cases.
>>
>> So that means we just need the appropriate barrier-fu to ensure
>> another thread calling drm_sched_fence_set_deadline_finished() sees
>> fence->parent set before the test_bit.  It could mean that the backend
>> implementation sees the same deadline set twice, but that is fine.
>>
> something like:

Of hand I think that this should work.

Or rather say I can't see anything wrong with that.

Christian.

>
> -----
> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c
> b/drivers/gpu/drm/scheduler/sched_fence.c
> index 4fc41a71d1c7..7f2af6d1777c 100644
> --- a/drivers/gpu/drm/scheduler/sched_fence.c
> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> @@ -132,6 +132,7 @@ static void
> drm_sched_fence_set_deadline_finished(struct dma_fence *f,
>      ktime_t deadline)
>   {
>    struct drm_sched_fence *fence = to_drm_sched_fence(f);
> + struct dma_fence *parent;
>    unsigned long flags;
>
>    spin_lock_irqsave(&fence->lock, flags);
> @@ -148,8 +149,9 @@ static void
> drm_sched_fence_set_deadline_finished(struct dma_fence *f,
>
>    spin_unlock_irqrestore(&fence->lock, flags);
>
> - if (fence->parent)
> - dma_fence_set_deadline(fence->parent, deadline);
> + parent = smp_load_acquire(&fence->parent);
> + if (parent)
> + dma_fence_set_deadline(parent, deadline);
>   }
>
>   static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
> @@ -180,7 +182,7 @@ EXPORT_SYMBOL(to_drm_sched_fence);
>   void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
>    struct dma_fence *fence)
>   {
> - s_fence->parent = dma_fence_get(fence);
> + smp_store_release(&s_fence->parent, dma_fence_get(fence));
>    if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
>         &s_fence->finished.flags))
>    dma_fence_set_deadline(fence, s_fence->deadline);
> -----
>
> BR,
> -R
Andrey Grodzovsky Sept. 21, 2021, 8:09 p.m. UTC | #7
On 2021-09-03 2:47 p.m., Rob Clark wrote:

> From: Rob Clark <robdclark@chromium.org>
>
> As the finished fence is the one that is exposed to userspace, and
> therefore the one that other operations, like atomic update, would
> block on, we need to propagate the deadline from from the finished
> fence to the actual hw fence.
>
> v2: Split into drm_sched_fence_set_parent() (ckoenig)
>
> Signed-off-by: Rob Clark <robdclark@chromium.org>
> ---
>   drivers/gpu/drm/scheduler/sched_fence.c | 34 +++++++++++++++++++++++++
>   drivers/gpu/drm/scheduler/sched_main.c  |  2 +-
>   include/drm/gpu_scheduler.h             |  8 ++++++
>   3 files changed, 43 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
> index bcea035cf4c6..4fc41a71d1c7 100644
> --- a/drivers/gpu/drm/scheduler/sched_fence.c
> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> @@ -128,6 +128,30 @@ static void drm_sched_fence_release_finished(struct dma_fence *f)
>   	dma_fence_put(&fence->scheduled);
>   }
>   
> +static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
> +						  ktime_t deadline)
> +{
> +	struct drm_sched_fence *fence = to_drm_sched_fence(f);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&fence->lock, flags);
> +
> +	/* If we already have an earlier deadline, keep it: */
> +	if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) &&
> +	    ktime_before(fence->deadline, deadline)) {
> +		spin_unlock_irqrestore(&fence->lock, flags);
> +		return;
> +	}
> +
> +	fence->deadline = deadline;
> +	set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
> +
> +	spin_unlock_irqrestore(&fence->lock, flags);
> +
> +	if (fence->parent)
> +		dma_fence_set_deadline(fence->parent, deadline);
> +}
> +
>   static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
>   	.get_driver_name = drm_sched_fence_get_driver_name,
>   	.get_timeline_name = drm_sched_fence_get_timeline_name,
> @@ -138,6 +162,7 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = {
>   	.get_driver_name = drm_sched_fence_get_driver_name,
>   	.get_timeline_name = drm_sched_fence_get_timeline_name,
>   	.release = drm_sched_fence_release_finished,
> +	.set_deadline = drm_sched_fence_set_deadline_finished,
>   };
>   
>   struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
> @@ -152,6 +177,15 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
>   }
>   EXPORT_SYMBOL(to_drm_sched_fence);
>   
> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
> +				struct dma_fence *fence)
> +{
> +	s_fence->parent = dma_fence_get(fence);
> +	if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
> +		     &s_fence->finished.flags))
> +		dma_fence_set_deadline(fence, s_fence->deadline);


I believe above you should pass be s_fence->finished to 
dma_fence_set_deadline
instead it fence which is the HW fence itself.

Andrey


> +}
> +
>   struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
>   					      void *owner)
>   {
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 595e47ff7d06..27bf0ac0625f 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -978,7 +978,7 @@ static int drm_sched_main(void *param)
>   		drm_sched_fence_scheduled(s_fence);
>   
>   		if (!IS_ERR_OR_NULL(fence)) {
> -			s_fence->parent = dma_fence_get(fence);
> +			drm_sched_fence_set_parent(s_fence, fence);
>   			r = dma_fence_add_callback(fence, &sched_job->cb,
>   						   drm_sched_job_done_cb);
>   			if (r == -ENOENT)
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 7f77a455722c..158ddd662469 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -238,6 +238,12 @@ struct drm_sched_fence {
>            */
>   	struct dma_fence		finished;
>   
> +	/**
> +	 * @deadline: deadline set on &drm_sched_fence.finished which
> +	 * potentially needs to be propagated to &drm_sched_fence.parent
> +	 */
> +	ktime_t				deadline;
> +
>           /**
>            * @parent: the fence returned by &drm_sched_backend_ops.run_job
>            * when scheduling the job on hardware. We signal the
> @@ -505,6 +511,8 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
>   				   enum drm_sched_priority priority);
>   bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
>   
> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
> +				struct dma_fence *fence);
>   struct drm_sched_fence *drm_sched_fence_alloc(
>   	struct drm_sched_entity *s_entity, void *owner);
>   void drm_sched_fence_init(struct drm_sched_fence *fence,
Rob Clark Sept. 21, 2021, 8:47 p.m. UTC | #8
On Tue, Sep 21, 2021 at 1:09 PM Andrey Grodzovsky
<andrey.grodzovsky@amd.com> wrote:
>
> On 2021-09-03 2:47 p.m., Rob Clark wrote:
>
> > From: Rob Clark <robdclark@chromium.org>
> >
> > As the finished fence is the one that is exposed to userspace, and
> > therefore the one that other operations, like atomic update, would
> > block on, we need to propagate the deadline from from the finished
> > fence to the actual hw fence.
> >
> > v2: Split into drm_sched_fence_set_parent() (ckoenig)
> >
> > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > ---
> >   drivers/gpu/drm/scheduler/sched_fence.c | 34 +++++++++++++++++++++++++
> >   drivers/gpu/drm/scheduler/sched_main.c  |  2 +-
> >   include/drm/gpu_scheduler.h             |  8 ++++++
> >   3 files changed, 43 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
> > index bcea035cf4c6..4fc41a71d1c7 100644
> > --- a/drivers/gpu/drm/scheduler/sched_fence.c
> > +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> > @@ -128,6 +128,30 @@ static void drm_sched_fence_release_finished(struct dma_fence *f)
> >       dma_fence_put(&fence->scheduled);
> >   }
> >
> > +static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
> > +                                               ktime_t deadline)
> > +{
> > +     struct drm_sched_fence *fence = to_drm_sched_fence(f);
> > +     unsigned long flags;
> > +
> > +     spin_lock_irqsave(&fence->lock, flags);
> > +
> > +     /* If we already have an earlier deadline, keep it: */
> > +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) &&
> > +         ktime_before(fence->deadline, deadline)) {
> > +             spin_unlock_irqrestore(&fence->lock, flags);
> > +             return;
> > +     }
> > +
> > +     fence->deadline = deadline;
> > +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
> > +
> > +     spin_unlock_irqrestore(&fence->lock, flags);
> > +
> > +     if (fence->parent)
> > +             dma_fence_set_deadline(fence->parent, deadline);
> > +}
> > +
> >   static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
> >       .get_driver_name = drm_sched_fence_get_driver_name,
> >       .get_timeline_name = drm_sched_fence_get_timeline_name,
> > @@ -138,6 +162,7 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = {
> >       .get_driver_name = drm_sched_fence_get_driver_name,
> >       .get_timeline_name = drm_sched_fence_get_timeline_name,
> >       .release = drm_sched_fence_release_finished,
> > +     .set_deadline = drm_sched_fence_set_deadline_finished,
> >   };
> >
> >   struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
> > @@ -152,6 +177,15 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
> >   }
> >   EXPORT_SYMBOL(to_drm_sched_fence);
> >
> > +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
> > +                             struct dma_fence *fence)
> > +{
> > +     s_fence->parent = dma_fence_get(fence);
> > +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
> > +                  &s_fence->finished.flags))
> > +             dma_fence_set_deadline(fence, s_fence->deadline);
>
>
> I believe above you should pass be s_fence->finished to
> dma_fence_set_deadline
> instead it fence which is the HW fence itself.

Hmm, unless this has changed recently with some patches I don't have,
s_fence->parent is the one signalled by hw, so it is the one we want
to set the deadline on

BR,
-R

> Andrey
>
>
> > +}
> > +
> >   struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
> >                                             void *owner)
> >   {
> > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> > index 595e47ff7d06..27bf0ac0625f 100644
> > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > @@ -978,7 +978,7 @@ static int drm_sched_main(void *param)
> >               drm_sched_fence_scheduled(s_fence);
> >
> >               if (!IS_ERR_OR_NULL(fence)) {
> > -                     s_fence->parent = dma_fence_get(fence);
> > +                     drm_sched_fence_set_parent(s_fence, fence);
> >                       r = dma_fence_add_callback(fence, &sched_job->cb,
> >                                                  drm_sched_job_done_cb);
> >                       if (r == -ENOENT)
> > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> > index 7f77a455722c..158ddd662469 100644
> > --- a/include/drm/gpu_scheduler.h
> > +++ b/include/drm/gpu_scheduler.h
> > @@ -238,6 +238,12 @@ struct drm_sched_fence {
> >            */
> >       struct dma_fence                finished;
> >
> > +     /**
> > +      * @deadline: deadline set on &drm_sched_fence.finished which
> > +      * potentially needs to be propagated to &drm_sched_fence.parent
> > +      */
> > +     ktime_t                         deadline;
> > +
> >           /**
> >            * @parent: the fence returned by &drm_sched_backend_ops.run_job
> >            * when scheduling the job on hardware. We signal the
> > @@ -505,6 +511,8 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
> >                                  enum drm_sched_priority priority);
> >   bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
> >
> > +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
> > +                             struct dma_fence *fence);
> >   struct drm_sched_fence *drm_sched_fence_alloc(
> >       struct drm_sched_entity *s_entity, void *owner);
> >   void drm_sched_fence_init(struct drm_sched_fence *fence,
Andrey Grodzovsky Sept. 22, 2021, 2:18 a.m. UTC | #9
On 2021-09-21 4:47 p.m., Rob Clark wrote:
> On Tue, Sep 21, 2021 at 1:09 PM Andrey Grodzovsky
> <andrey.grodzovsky@amd.com> wrote:
>> On 2021-09-03 2:47 p.m., Rob Clark wrote:
>>
>>> From: Rob Clark <robdclark@chromium.org>
>>>
>>> As the finished fence is the one that is exposed to userspace, and
>>> therefore the one that other operations, like atomic update, would
>>> block on, we need to propagate the deadline from from the finished
>>> fence to the actual hw fence.
>>>
>>> v2: Split into drm_sched_fence_set_parent() (ckoenig)
>>>
>>> Signed-off-by: Rob Clark <robdclark@chromium.org>
>>> ---
>>>    drivers/gpu/drm/scheduler/sched_fence.c | 34 +++++++++++++++++++++++++
>>>    drivers/gpu/drm/scheduler/sched_main.c  |  2 +-
>>>    include/drm/gpu_scheduler.h             |  8 ++++++
>>>    3 files changed, 43 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
>>> index bcea035cf4c6..4fc41a71d1c7 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_fence.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
>>> @@ -128,6 +128,30 @@ static void drm_sched_fence_release_finished(struct dma_fence *f)
>>>        dma_fence_put(&fence->scheduled);
>>>    }
>>>
>>> +static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
>>> +                                               ktime_t deadline)
>>> +{
>>> +     struct drm_sched_fence *fence = to_drm_sched_fence(f);
>>> +     unsigned long flags;
>>> +
>>> +     spin_lock_irqsave(&fence->lock, flags);
>>> +
>>> +     /* If we already have an earlier deadline, keep it: */
>>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) &&
>>> +         ktime_before(fence->deadline, deadline)) {
>>> +             spin_unlock_irqrestore(&fence->lock, flags);
>>> +             return;
>>> +     }
>>> +
>>> +     fence->deadline = deadline;
>>> +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
>>> +
>>> +     spin_unlock_irqrestore(&fence->lock, flags);
>>> +
>>> +     if (fence->parent)
>>> +             dma_fence_set_deadline(fence->parent, deadline);
>>> +}
>>> +
>>>    static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
>>>        .get_driver_name = drm_sched_fence_get_driver_name,
>>>        .get_timeline_name = drm_sched_fence_get_timeline_name,
>>> @@ -138,6 +162,7 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = {
>>>        .get_driver_name = drm_sched_fence_get_driver_name,
>>>        .get_timeline_name = drm_sched_fence_get_timeline_name,
>>>        .release = drm_sched_fence_release_finished,
>>> +     .set_deadline = drm_sched_fence_set_deadline_finished,
>>>    };
>>>
>>>    struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
>>> @@ -152,6 +177,15 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
>>>    }
>>>    EXPORT_SYMBOL(to_drm_sched_fence);
>>>
>>> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
>>> +                             struct dma_fence *fence)
>>> +{
>>> +     s_fence->parent = dma_fence_get(fence);
>>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
>>> +                  &s_fence->finished.flags))
>>> +             dma_fence_set_deadline(fence, s_fence->deadline);
>>
>> I believe above you should pass be s_fence->finished to
>> dma_fence_set_deadline
>> instead it fence which is the HW fence itself.
> Hmm, unless this has changed recently with some patches I don't have,
> s_fence->parent is the one signalled by hw, so it is the one we want
> to set the deadline on
>
> BR,
> -R


No it didn't change. But then when exactly will 
drm_sched_fence_set_deadline_finished
execute such that fence->parent != NULL ? In other words, I am not clear 
how propagation
happens otherwise - if dma_fence_set_deadline is called with the HW 
fence then the assumption
here is that driver provided driver specific 
dma_fence_ops.dma_fence_set_deadline callback executes
but I was under impression that drm_sched_fence_set_deadline_finished is 
the one that propagates
the deadline to the HW fence's callback and for it to execute 
dma_fence_set_deadline needs to be called
with s_fence->finished.

Andrey



>
>> Andrey
>>
>>
>>> +}
>>> +
>>>    struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
>>>                                              void *owner)
>>>    {
>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>>> index 595e47ff7d06..27bf0ac0625f 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>> @@ -978,7 +978,7 @@ static int drm_sched_main(void *param)
>>>                drm_sched_fence_scheduled(s_fence);
>>>
>>>                if (!IS_ERR_OR_NULL(fence)) {
>>> -                     s_fence->parent = dma_fence_get(fence);
>>> +                     drm_sched_fence_set_parent(s_fence, fence);
>>>                        r = dma_fence_add_callback(fence, &sched_job->cb,
>>>                                                   drm_sched_job_done_cb);
>>>                        if (r == -ENOENT)
>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>>> index 7f77a455722c..158ddd662469 100644
>>> --- a/include/drm/gpu_scheduler.h
>>> +++ b/include/drm/gpu_scheduler.h
>>> @@ -238,6 +238,12 @@ struct drm_sched_fence {
>>>             */
>>>        struct dma_fence                finished;
>>>
>>> +     /**
>>> +      * @deadline: deadline set on &drm_sched_fence.finished which
>>> +      * potentially needs to be propagated to &drm_sched_fence.parent
>>> +      */
>>> +     ktime_t                         deadline;
>>> +
>>>            /**
>>>             * @parent: the fence returned by &drm_sched_backend_ops.run_job
>>>             * when scheduling the job on hardware. We signal the
>>> @@ -505,6 +511,8 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
>>>                                   enum drm_sched_priority priority);
>>>    bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
>>>
>>> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
>>> +                             struct dma_fence *fence);
>>>    struct drm_sched_fence *drm_sched_fence_alloc(
>>>        struct drm_sched_entity *s_entity, void *owner);
>>>    void drm_sched_fence_init(struct drm_sched_fence *fence,
Rob Clark Sept. 22, 2021, 3:32 a.m. UTC | #10
On Tue, Sep 21, 2021 at 7:18 PM Andrey Grodzovsky
<andrey.grodzovsky@amd.com> wrote:
>
>
> On 2021-09-21 4:47 p.m., Rob Clark wrote:
> > On Tue, Sep 21, 2021 at 1:09 PM Andrey Grodzovsky
> > <andrey.grodzovsky@amd.com> wrote:
> >> On 2021-09-03 2:47 p.m., Rob Clark wrote:
> >>
> >>> From: Rob Clark <robdclark@chromium.org>
> >>>
> >>> As the finished fence is the one that is exposed to userspace, and
> >>> therefore the one that other operations, like atomic update, would
> >>> block on, we need to propagate the deadline from from the finished
> >>> fence to the actual hw fence.
> >>>
> >>> v2: Split into drm_sched_fence_set_parent() (ckoenig)
> >>>
> >>> Signed-off-by: Rob Clark <robdclark@chromium.org>
> >>> ---
> >>>    drivers/gpu/drm/scheduler/sched_fence.c | 34 +++++++++++++++++++++++++
> >>>    drivers/gpu/drm/scheduler/sched_main.c  |  2 +-
> >>>    include/drm/gpu_scheduler.h             |  8 ++++++
> >>>    3 files changed, 43 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
> >>> index bcea035cf4c6..4fc41a71d1c7 100644
> >>> --- a/drivers/gpu/drm/scheduler/sched_fence.c
> >>> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> >>> @@ -128,6 +128,30 @@ static void drm_sched_fence_release_finished(struct dma_fence *f)
> >>>        dma_fence_put(&fence->scheduled);
> >>>    }
> >>>
> >>> +static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
> >>> +                                               ktime_t deadline)
> >>> +{
> >>> +     struct drm_sched_fence *fence = to_drm_sched_fence(f);
> >>> +     unsigned long flags;
> >>> +
> >>> +     spin_lock_irqsave(&fence->lock, flags);
> >>> +
> >>> +     /* If we already have an earlier deadline, keep it: */
> >>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) &&
> >>> +         ktime_before(fence->deadline, deadline)) {
> >>> +             spin_unlock_irqrestore(&fence->lock, flags);
> >>> +             return;
> >>> +     }
> >>> +
> >>> +     fence->deadline = deadline;
> >>> +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
> >>> +
> >>> +     spin_unlock_irqrestore(&fence->lock, flags);
> >>> +
> >>> +     if (fence->parent)
> >>> +             dma_fence_set_deadline(fence->parent, deadline);
> >>> +}
> >>> +
> >>>    static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
> >>>        .get_driver_name = drm_sched_fence_get_driver_name,
> >>>        .get_timeline_name = drm_sched_fence_get_timeline_name,
> >>> @@ -138,6 +162,7 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = {
> >>>        .get_driver_name = drm_sched_fence_get_driver_name,
> >>>        .get_timeline_name = drm_sched_fence_get_timeline_name,
> >>>        .release = drm_sched_fence_release_finished,
> >>> +     .set_deadline = drm_sched_fence_set_deadline_finished,
> >>>    };
> >>>
> >>>    struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
> >>> @@ -152,6 +177,15 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
> >>>    }
> >>>    EXPORT_SYMBOL(to_drm_sched_fence);
> >>>
> >>> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
> >>> +                             struct dma_fence *fence)
> >>> +{
> >>> +     s_fence->parent = dma_fence_get(fence);
> >>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
> >>> +                  &s_fence->finished.flags))
> >>> +             dma_fence_set_deadline(fence, s_fence->deadline);
> >>
> >> I believe above you should pass be s_fence->finished to
> >> dma_fence_set_deadline
> >> instead it fence which is the HW fence itself.
> > Hmm, unless this has changed recently with some patches I don't have,
> > s_fence->parent is the one signalled by hw, so it is the one we want
> > to set the deadline on
> >
> > BR,
> > -R
>
>
> No it didn't change. But then when exactly will
> drm_sched_fence_set_deadline_finished
> execute such that fence->parent != NULL ? In other words, I am not clear
> how propagation
> happens otherwise - if dma_fence_set_deadline is called with the HW
> fence then the assumption
> here is that driver provided driver specific
> dma_fence_ops.dma_fence_set_deadline callback executes
> but I was under impression that drm_sched_fence_set_deadline_finished is
> the one that propagates
> the deadline to the HW fence's callback and for it to execute
> dma_fence_set_deadline needs to be called
> with s_fence->finished.

Assuming I didn't screw up drm/msm conversion to scheduler,
&s_fence->finished is the one that will be returned to userspace.. and
later passed back to kernel for atomic commit (or to the compositor).
So it is the one that fence->set_deadline() will be called on.  But
s_fence->parent is the actual hw fence that needs to know about the
deadline.  Depending on whether or not the job has been written into
hw ringbuffer or not, there are two cases:

1) not scheduled yet, s_fence will store the deadline and propagate it
later once s_fence->parent is known
2) already scheduled, in which case s_fence->finished.set_deadline
will propagate it directly to the real fence

BR,
-R

> Andrey
>
>
>
> >
> >> Andrey
> >>
> >>
> >>> +}
> >>> +
> >>>    struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
> >>>                                              void *owner)
> >>>    {
> >>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> >>> index 595e47ff7d06..27bf0ac0625f 100644
> >>> --- a/drivers/gpu/drm/scheduler/sched_main.c
> >>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> >>> @@ -978,7 +978,7 @@ static int drm_sched_main(void *param)
> >>>                drm_sched_fence_scheduled(s_fence);
> >>>
> >>>                if (!IS_ERR_OR_NULL(fence)) {
> >>> -                     s_fence->parent = dma_fence_get(fence);
> >>> +                     drm_sched_fence_set_parent(s_fence, fence);
> >>>                        r = dma_fence_add_callback(fence, &sched_job->cb,
> >>>                                                   drm_sched_job_done_cb);
> >>>                        if (r == -ENOENT)
> >>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> >>> index 7f77a455722c..158ddd662469 100644
> >>> --- a/include/drm/gpu_scheduler.h
> >>> +++ b/include/drm/gpu_scheduler.h
> >>> @@ -238,6 +238,12 @@ struct drm_sched_fence {
> >>>             */
> >>>        struct dma_fence                finished;
> >>>
> >>> +     /**
> >>> +      * @deadline: deadline set on &drm_sched_fence.finished which
> >>> +      * potentially needs to be propagated to &drm_sched_fence.parent
> >>> +      */
> >>> +     ktime_t                         deadline;
> >>> +
> >>>            /**
> >>>             * @parent: the fence returned by &drm_sched_backend_ops.run_job
> >>>             * when scheduling the job on hardware. We signal the
> >>> @@ -505,6 +511,8 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
> >>>                                   enum drm_sched_priority priority);
> >>>    bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
> >>>
> >>> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
> >>> +                             struct dma_fence *fence);
> >>>    struct drm_sched_fence *drm_sched_fence_alloc(
> >>>        struct drm_sched_entity *s_entity, void *owner);
> >>>    void drm_sched_fence_init(struct drm_sched_fence *fence,
Andrey Grodzovsky Sept. 22, 2021, 2:31 p.m. UTC | #11
On 2021-09-21 11:32 p.m., Rob Clark wrote:
> On Tue, Sep 21, 2021 at 7:18 PM Andrey Grodzovsky
> <andrey.grodzovsky@amd.com> wrote:
>>
>> On 2021-09-21 4:47 p.m., Rob Clark wrote:
>>> On Tue, Sep 21, 2021 at 1:09 PM Andrey Grodzovsky
>>> <andrey.grodzovsky@amd.com> wrote:
>>>> On 2021-09-03 2:47 p.m., Rob Clark wrote:
>>>>
>>>>> From: Rob Clark <robdclark@chromium.org>
>>>>>
>>>>> As the finished fence is the one that is exposed to userspace, and
>>>>> therefore the one that other operations, like atomic update, would
>>>>> block on, we need to propagate the deadline from from the finished
>>>>> fence to the actual hw fence.
>>>>>
>>>>> v2: Split into drm_sched_fence_set_parent() (ckoenig)
>>>>>
>>>>> Signed-off-by: Rob Clark <robdclark@chromium.org>
>>>>> ---
>>>>>     drivers/gpu/drm/scheduler/sched_fence.c | 34 +++++++++++++++++++++++++
>>>>>     drivers/gpu/drm/scheduler/sched_main.c  |  2 +-
>>>>>     include/drm/gpu_scheduler.h             |  8 ++++++
>>>>>     3 files changed, 43 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
>>>>> index bcea035cf4c6..4fc41a71d1c7 100644
>>>>> --- a/drivers/gpu/drm/scheduler/sched_fence.c
>>>>> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
>>>>> @@ -128,6 +128,30 @@ static void drm_sched_fence_release_finished(struct dma_fence *f)
>>>>>         dma_fence_put(&fence->scheduled);
>>>>>     }
>>>>>
>>>>> +static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
>>>>> +                                               ktime_t deadline)
>>>>> +{
>>>>> +     struct drm_sched_fence *fence = to_drm_sched_fence(f);
>>>>> +     unsigned long flags;
>>>>> +
>>>>> +     spin_lock_irqsave(&fence->lock, flags);
>>>>> +
>>>>> +     /* If we already have an earlier deadline, keep it: */
>>>>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) &&
>>>>> +         ktime_before(fence->deadline, deadline)) {
>>>>> +             spin_unlock_irqrestore(&fence->lock, flags);
>>>>> +             return;
>>>>> +     }
>>>>> +
>>>>> +     fence->deadline = deadline;
>>>>> +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
>>>>> +
>>>>> +     spin_unlock_irqrestore(&fence->lock, flags);
>>>>> +
>>>>> +     if (fence->parent)
>>>>> +             dma_fence_set_deadline(fence->parent, deadline);
>>>>> +}
>>>>> +
>>>>>     static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
>>>>>         .get_driver_name = drm_sched_fence_get_driver_name,
>>>>>         .get_timeline_name = drm_sched_fence_get_timeline_name,
>>>>> @@ -138,6 +162,7 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = {
>>>>>         .get_driver_name = drm_sched_fence_get_driver_name,
>>>>>         .get_timeline_name = drm_sched_fence_get_timeline_name,
>>>>>         .release = drm_sched_fence_release_finished,
>>>>> +     .set_deadline = drm_sched_fence_set_deadline_finished,
>>>>>     };
>>>>>
>>>>>     struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
>>>>> @@ -152,6 +177,15 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
>>>>>     }
>>>>>     EXPORT_SYMBOL(to_drm_sched_fence);
>>>>>
>>>>> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
>>>>> +                             struct dma_fence *fence)
>>>>> +{
>>>>> +     s_fence->parent = dma_fence_get(fence);
>>>>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
>>>>> +                  &s_fence->finished.flags))
>>>>> +             dma_fence_set_deadline(fence, s_fence->deadline);
>>>> I believe above you should pass be s_fence->finished to
>>>> dma_fence_set_deadline
>>>> instead it fence which is the HW fence itself.
>>> Hmm, unless this has changed recently with some patches I don't have,
>>> s_fence->parent is the one signalled by hw, so it is the one we want
>>> to set the deadline on
>>>
>>> BR,
>>> -R
>>
>> No it didn't change. But then when exactly will
>> drm_sched_fence_set_deadline_finished
>> execute such that fence->parent != NULL ? In other words, I am not clear
>> how propagation
>> happens otherwise - if dma_fence_set_deadline is called with the HW
>> fence then the assumption
>> here is that driver provided driver specific
>> dma_fence_ops.dma_fence_set_deadline callback executes
>> but I was under impression that drm_sched_fence_set_deadline_finished is
>> the one that propagates
>> the deadline to the HW fence's callback and for it to execute
>> dma_fence_set_deadline needs to be called
>> with s_fence->finished.
> Assuming I didn't screw up drm/msm conversion to scheduler,
> &s_fence->finished is the one that will be returned to userspace.. and
> later passed back to kernel for atomic commit (or to the compositor).
> So it is the one that fence->set_deadline() will be called on.  But
> s_fence->parent is the actual hw fence that needs to know about the
> deadline.  Depending on whether or not the job has been written into
> hw ringbuffer or not, there are two cases:
>
> 1) not scheduled yet, s_fence will store the deadline and propagate it
> later once s_fence->parent is known


And by later you mean the call to drm_sched_fence_set_parent
after HW fence is returned  ? If yes I think i get it now.

Andrey


> 2) already scheduled, in which case s_fence->finished.set_deadline
> will propagate it directly to the real fence
>
> BR,
> -R
>
>> Andrey
>>
>>
>>
>>>> Andrey
>>>>
>>>>
>>>>> +}
>>>>> +
>>>>>     struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
>>>>>                                               void *owner)
>>>>>     {
>>>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>>>>> index 595e47ff7d06..27bf0ac0625f 100644
>>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>>>> @@ -978,7 +978,7 @@ static int drm_sched_main(void *param)
>>>>>                 drm_sched_fence_scheduled(s_fence);
>>>>>
>>>>>                 if (!IS_ERR_OR_NULL(fence)) {
>>>>> -                     s_fence->parent = dma_fence_get(fence);
>>>>> +                     drm_sched_fence_set_parent(s_fence, fence);
>>>>>                         r = dma_fence_add_callback(fence, &sched_job->cb,
>>>>>                                                    drm_sched_job_done_cb);
>>>>>                         if (r == -ENOENT)
>>>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>>>>> index 7f77a455722c..158ddd662469 100644
>>>>> --- a/include/drm/gpu_scheduler.h
>>>>> +++ b/include/drm/gpu_scheduler.h
>>>>> @@ -238,6 +238,12 @@ struct drm_sched_fence {
>>>>>              */
>>>>>         struct dma_fence                finished;
>>>>>
>>>>> +     /**
>>>>> +      * @deadline: deadline set on &drm_sched_fence.finished which
>>>>> +      * potentially needs to be propagated to &drm_sched_fence.parent
>>>>> +      */
>>>>> +     ktime_t                         deadline;
>>>>> +
>>>>>             /**
>>>>>              * @parent: the fence returned by &drm_sched_backend_ops.run_job
>>>>>              * when scheduling the job on hardware. We signal the
>>>>> @@ -505,6 +511,8 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
>>>>>                                    enum drm_sched_priority priority);
>>>>>     bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
>>>>>
>>>>> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
>>>>> +                             struct dma_fence *fence);
>>>>>     struct drm_sched_fence *drm_sched_fence_alloc(
>>>>>         struct drm_sched_entity *s_entity, void *owner);
>>>>>     void drm_sched_fence_init(struct drm_sched_fence *fence,
Rob Clark Sept. 22, 2021, 3:01 p.m. UTC | #12
On Wed, Sep 22, 2021 at 7:31 AM Andrey Grodzovsky
<andrey.grodzovsky@amd.com> wrote:
>
>
> On 2021-09-21 11:32 p.m., Rob Clark wrote:
> > On Tue, Sep 21, 2021 at 7:18 PM Andrey Grodzovsky
> > <andrey.grodzovsky@amd.com> wrote:
> >>
> >> On 2021-09-21 4:47 p.m., Rob Clark wrote:
> >>> On Tue, Sep 21, 2021 at 1:09 PM Andrey Grodzovsky
> >>> <andrey.grodzovsky@amd.com> wrote:
> >>>> On 2021-09-03 2:47 p.m., Rob Clark wrote:
> >>>>
> >>>>> From: Rob Clark <robdclark@chromium.org>
> >>>>>
> >>>>> As the finished fence is the one that is exposed to userspace, and
> >>>>> therefore the one that other operations, like atomic update, would
> >>>>> block on, we need to propagate the deadline from from the finished
> >>>>> fence to the actual hw fence.
> >>>>>
> >>>>> v2: Split into drm_sched_fence_set_parent() (ckoenig)
> >>>>>
> >>>>> Signed-off-by: Rob Clark <robdclark@chromium.org>
> >>>>> ---
> >>>>>     drivers/gpu/drm/scheduler/sched_fence.c | 34 +++++++++++++++++++++++++
> >>>>>     drivers/gpu/drm/scheduler/sched_main.c  |  2 +-
> >>>>>     include/drm/gpu_scheduler.h             |  8 ++++++
> >>>>>     3 files changed, 43 insertions(+), 1 deletion(-)
> >>>>>
> >>>>> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
> >>>>> index bcea035cf4c6..4fc41a71d1c7 100644
> >>>>> --- a/drivers/gpu/drm/scheduler/sched_fence.c
> >>>>> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> >>>>> @@ -128,6 +128,30 @@ static void drm_sched_fence_release_finished(struct dma_fence *f)
> >>>>>         dma_fence_put(&fence->scheduled);
> >>>>>     }
> >>>>>
> >>>>> +static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
> >>>>> +                                               ktime_t deadline)
> >>>>> +{
> >>>>> +     struct drm_sched_fence *fence = to_drm_sched_fence(f);
> >>>>> +     unsigned long flags;
> >>>>> +
> >>>>> +     spin_lock_irqsave(&fence->lock, flags);
> >>>>> +
> >>>>> +     /* If we already have an earlier deadline, keep it: */
> >>>>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) &&
> >>>>> +         ktime_before(fence->deadline, deadline)) {
> >>>>> +             spin_unlock_irqrestore(&fence->lock, flags);
> >>>>> +             return;
> >>>>> +     }
> >>>>> +
> >>>>> +     fence->deadline = deadline;
> >>>>> +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
> >>>>> +
> >>>>> +     spin_unlock_irqrestore(&fence->lock, flags);
> >>>>> +
> >>>>> +     if (fence->parent)
> >>>>> +             dma_fence_set_deadline(fence->parent, deadline);
> >>>>> +}
> >>>>> +
> >>>>>     static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
> >>>>>         .get_driver_name = drm_sched_fence_get_driver_name,
> >>>>>         .get_timeline_name = drm_sched_fence_get_timeline_name,
> >>>>> @@ -138,6 +162,7 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = {
> >>>>>         .get_driver_name = drm_sched_fence_get_driver_name,
> >>>>>         .get_timeline_name = drm_sched_fence_get_timeline_name,
> >>>>>         .release = drm_sched_fence_release_finished,
> >>>>> +     .set_deadline = drm_sched_fence_set_deadline_finished,
> >>>>>     };
> >>>>>
> >>>>>     struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
> >>>>> @@ -152,6 +177,15 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
> >>>>>     }
> >>>>>     EXPORT_SYMBOL(to_drm_sched_fence);
> >>>>>
> >>>>> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
> >>>>> +                             struct dma_fence *fence)
> >>>>> +{
> >>>>> +     s_fence->parent = dma_fence_get(fence);
> >>>>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
> >>>>> +                  &s_fence->finished.flags))
> >>>>> +             dma_fence_set_deadline(fence, s_fence->deadline);
> >>>> I believe above you should pass be s_fence->finished to
> >>>> dma_fence_set_deadline
> >>>> instead it fence which is the HW fence itself.
> >>> Hmm, unless this has changed recently with some patches I don't have,
> >>> s_fence->parent is the one signalled by hw, so it is the one we want
> >>> to set the deadline on
> >>>
> >>> BR,
> >>> -R
> >>
> >> No it didn't change. But then when exactly will
> >> drm_sched_fence_set_deadline_finished
> >> execute such that fence->parent != NULL ? In other words, I am not clear
> >> how propagation
> >> happens otherwise - if dma_fence_set_deadline is called with the HW
> >> fence then the assumption
> >> here is that driver provided driver specific
> >> dma_fence_ops.dma_fence_set_deadline callback executes
> >> but I was under impression that drm_sched_fence_set_deadline_finished is
> >> the one that propagates
> >> the deadline to the HW fence's callback and for it to execute
> >> dma_fence_set_deadline needs to be called
> >> with s_fence->finished.
> > Assuming I didn't screw up drm/msm conversion to scheduler,
> > &s_fence->finished is the one that will be returned to userspace.. and
> > later passed back to kernel for atomic commit (or to the compositor).
> > So it is the one that fence->set_deadline() will be called on.  But
> > s_fence->parent is the actual hw fence that needs to know about the
> > deadline.  Depending on whether or not the job has been written into
> > hw ringbuffer or not, there are two cases:
> >
> > 1) not scheduled yet, s_fence will store the deadline and propagate it
> > later once s_fence->parent is known
>
>
> And by later you mean the call to drm_sched_fence_set_parent
> after HW fence is returned  ? If yes I think i get it now.

Yup :-)

BR,
-R

> Andrey
>
>
> > 2) already scheduled, in which case s_fence->finished.set_deadline
> > will propagate it directly to the real fence
> >
> > BR,
> > -R
> >
> >> Andrey
> >>
> >>
> >>
> >>>> Andrey
> >>>>
> >>>>
> >>>>> +}
> >>>>> +
> >>>>>     struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
> >>>>>                                               void *owner)
> >>>>>     {
> >>>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> >>>>> index 595e47ff7d06..27bf0ac0625f 100644
> >>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
> >>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> >>>>> @@ -978,7 +978,7 @@ static int drm_sched_main(void *param)
> >>>>>                 drm_sched_fence_scheduled(s_fence);
> >>>>>
> >>>>>                 if (!IS_ERR_OR_NULL(fence)) {
> >>>>> -                     s_fence->parent = dma_fence_get(fence);
> >>>>> +                     drm_sched_fence_set_parent(s_fence, fence);
> >>>>>                         r = dma_fence_add_callback(fence, &sched_job->cb,
> >>>>>                                                    drm_sched_job_done_cb);
> >>>>>                         if (r == -ENOENT)
> >>>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> >>>>> index 7f77a455722c..158ddd662469 100644
> >>>>> --- a/include/drm/gpu_scheduler.h
> >>>>> +++ b/include/drm/gpu_scheduler.h
> >>>>> @@ -238,6 +238,12 @@ struct drm_sched_fence {
> >>>>>              */
> >>>>>         struct dma_fence                finished;
> >>>>>
> >>>>> +     /**
> >>>>> +      * @deadline: deadline set on &drm_sched_fence.finished which
> >>>>> +      * potentially needs to be propagated to &drm_sched_fence.parent
> >>>>> +      */
> >>>>> +     ktime_t                         deadline;
> >>>>> +
> >>>>>             /**
> >>>>>              * @parent: the fence returned by &drm_sched_backend_ops.run_job
> >>>>>              * when scheduling the job on hardware. We signal the
> >>>>> @@ -505,6 +511,8 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
> >>>>>                                    enum drm_sched_priority priority);
> >>>>>     bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
> >>>>>
> >>>>> +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
> >>>>> +                             struct dma_fence *fence);
> >>>>>     struct drm_sched_fence *drm_sched_fence_alloc(
> >>>>>         struct drm_sched_entity *s_entity, void *owner);
> >>>>>     void drm_sched_fence_init(struct drm_sched_fence *fence,
diff mbox series

Patch

diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
index bcea035cf4c6..4fc41a71d1c7 100644
--- a/drivers/gpu/drm/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/scheduler/sched_fence.c
@@ -128,6 +128,30 @@  static void drm_sched_fence_release_finished(struct dma_fence *f)
 	dma_fence_put(&fence->scheduled);
 }
 
+static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
+						  ktime_t deadline)
+{
+	struct drm_sched_fence *fence = to_drm_sched_fence(f);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fence->lock, flags);
+
+	/* If we already have an earlier deadline, keep it: */
+	if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) &&
+	    ktime_before(fence->deadline, deadline)) {
+		spin_unlock_irqrestore(&fence->lock, flags);
+		return;
+	}
+
+	fence->deadline = deadline;
+	set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
+
+	spin_unlock_irqrestore(&fence->lock, flags);
+
+	if (fence->parent)
+		dma_fence_set_deadline(fence->parent, deadline);
+}
+
 static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
 	.get_driver_name = drm_sched_fence_get_driver_name,
 	.get_timeline_name = drm_sched_fence_get_timeline_name,
@@ -138,6 +162,7 @@  static const struct dma_fence_ops drm_sched_fence_ops_finished = {
 	.get_driver_name = drm_sched_fence_get_driver_name,
 	.get_timeline_name = drm_sched_fence_get_timeline_name,
 	.release = drm_sched_fence_release_finished,
+	.set_deadline = drm_sched_fence_set_deadline_finished,
 };
 
 struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
@@ -152,6 +177,15 @@  struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
 }
 EXPORT_SYMBOL(to_drm_sched_fence);
 
+void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
+				struct dma_fence *fence)
+{
+	s_fence->parent = dma_fence_get(fence);
+	if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
+		     &s_fence->finished.flags))
+		dma_fence_set_deadline(fence, s_fence->deadline);
+}
+
 struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
 					      void *owner)
 {
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 595e47ff7d06..27bf0ac0625f 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -978,7 +978,7 @@  static int drm_sched_main(void *param)
 		drm_sched_fence_scheduled(s_fence);
 
 		if (!IS_ERR_OR_NULL(fence)) {
-			s_fence->parent = dma_fence_get(fence);
+			drm_sched_fence_set_parent(s_fence, fence);
 			r = dma_fence_add_callback(fence, &sched_job->cb,
 						   drm_sched_job_done_cb);
 			if (r == -ENOENT)
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 7f77a455722c..158ddd662469 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -238,6 +238,12 @@  struct drm_sched_fence {
          */
 	struct dma_fence		finished;
 
+	/**
+	 * @deadline: deadline set on &drm_sched_fence.finished which
+	 * potentially needs to be propagated to &drm_sched_fence.parent
+	 */
+	ktime_t				deadline;
+
         /**
          * @parent: the fence returned by &drm_sched_backend_ops.run_job
          * when scheduling the job on hardware. We signal the
@@ -505,6 +511,8 @@  void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
 				   enum drm_sched_priority priority);
 bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
 
+void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
+				struct dma_fence *fence);
 struct drm_sched_fence *drm_sched_fence_alloc(
 	struct drm_sched_entity *s_entity, void *owner);
 void drm_sched_fence_init(struct drm_sched_fence *fence,