diff mbox series

[RFC,1/4] dma-fence: Add deadline awareness

Message ID 20210726233854.2453899-2-robdclark@gmail.com (mailing list archive)
State New, archived
Headers show
Series dma-fence: Deadline awareness | expand

Commit Message

Rob Clark July 26, 2021, 11:38 p.m. UTC
From: Rob Clark <robdclark@chromium.org>

Add a way to hint to the fence signaler of an upcoming deadline, such as
vblank, which the fence waiter would prefer not to miss.  This is to aid
the fence signaler in making power management decisions, like boosting
frequency as the deadline approaches and awareness of missing deadlines
so that can be factored in to the frequency scaling.

Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/dma-buf/dma-fence.c | 39 +++++++++++++++++++++++++++++++++++++
 include/linux/dma-fence.h   | 17 ++++++++++++++++
 2 files changed, 56 insertions(+)

Comments

Christian König July 27, 2021, 7:11 a.m. UTC | #1
Am 27.07.21 um 01:38 schrieb Rob Clark:
> From: Rob Clark <robdclark@chromium.org>
>
> Add a way to hint to the fence signaler of an upcoming deadline, such as
> vblank, which the fence waiter would prefer not to miss.  This is to aid
> the fence signaler in making power management decisions, like boosting
> frequency as the deadline approaches and awareness of missing deadlines
> so that can be factored in to the frequency scaling.
>
> Signed-off-by: Rob Clark <robdclark@chromium.org>
> ---
>   drivers/dma-buf/dma-fence.c | 39 +++++++++++++++++++++++++++++++++++++
>   include/linux/dma-fence.h   | 17 ++++++++++++++++
>   2 files changed, 56 insertions(+)
>
> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> index ce0f5eff575d..2e0d25ab457e 100644
> --- a/drivers/dma-buf/dma-fence.c
> +++ b/drivers/dma-buf/dma-fence.c
> @@ -910,6 +910,45 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
>   }
>   EXPORT_SYMBOL(dma_fence_wait_any_timeout);
>   
> +
> +/**
> + * dma_fence_set_deadline - set desired fence-wait deadline
> + * @fence:    the fence that is to be waited on
> + * @deadline: the time by which the waiter hopes for the fence to be
> + *            signaled
> + *
> + * Inform the fence signaler of an upcoming deadline, such as vblank, by
> + * which point the waiter would prefer the fence to be signaled by.  This
> + * is intended to give feedback to the fence signaler to aid in power
> + * management decisions, such as boosting GPU frequency if a periodic
> + * vblank deadline is approaching.
> + */
> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
> +{
> +	unsigned long flags;
> +
> +	if (dma_fence_is_signaled(fence))
> +		return;
> +
> +	spin_lock_irqsave(fence->lock, flags);
> +
> +	/* If we already have an earlier deadline, keep it: */
> +	if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags) &&
> +	    ktime_before(fence->deadline, deadline)) {
> +		spin_unlock_irqrestore(fence->lock, flags);
> +		return;
> +	}
> +
> +	fence->deadline = deadline;
> +	set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags);
> +
> +	spin_unlock_irqrestore(fence->lock, flags);
> +
> +	if (fence->ops->set_deadline)
> +		fence->ops->set_deadline(fence, deadline);
> +}
> +EXPORT_SYMBOL(dma_fence_set_deadline);
> +
>   /**
>    * dma_fence_init - Initialize a custom fence.
>    * @fence: the fence to initialize
> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> index 6ffb4b2c6371..4e6cfe4e6fbc 100644
> --- a/include/linux/dma-fence.h
> +++ b/include/linux/dma-fence.h
> @@ -88,6 +88,7 @@ struct dma_fence {
>   		/* @timestamp replaced by @rcu on dma_fence_release() */
>   		struct rcu_head rcu;
>   	};
> +	ktime_t deadline;

Mhm, adding the flag sounds ok to me but I'm a bit hesitating adding the 
deadline as extra field here.

We tuned the dma_fence structure intentionally so that it is only 64 bytes.

Regards,
Christian.

>   	u64 context;
>   	u64 seqno;
>   	unsigned long flags;
> @@ -99,6 +100,7 @@ enum dma_fence_flag_bits {
>   	DMA_FENCE_FLAG_SIGNALED_BIT,
>   	DMA_FENCE_FLAG_TIMESTAMP_BIT,
>   	DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> +	DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
>   	DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
>   };
>   
> @@ -261,6 +263,19 @@ struct dma_fence_ops {
>   	 */
>   	void (*timeline_value_str)(struct dma_fence *fence,
>   				   char *str, int size);
> +
> +	/**
> +	 * @set_deadline:
> +	 *
> +	 * Callback to allow a fence waiter to inform the fence signaler of an
> +	 * upcoming deadline, such as vblank, by which point the waiter would
> +	 * prefer the fence to be signaled by.  This is intended to give feedback
> +	 * to the fence signaler to aid in power management decisions, such as
> +	 * boosting GPU frequency.
> +	 *
> +	 * This callback is optional.
> +	 */
> +	void (*set_deadline)(struct dma_fence *fence, ktime_t deadline);
>   };
>   
>   void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
> @@ -586,6 +601,8 @@ static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr)
>   	return ret < 0 ? ret : 0;
>   }
>   
> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline);
> +
>   struct dma_fence *dma_fence_get_stub(void);
>   struct dma_fence *dma_fence_allocate_private_stub(void);
>   u64 dma_fence_context_alloc(unsigned num);
Rob Clark July 27, 2021, 2:25 p.m. UTC | #2
On Tue, Jul 27, 2021 at 12:11 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 27.07.21 um 01:38 schrieb Rob Clark:
> > From: Rob Clark <robdclark@chromium.org>
> >
> > Add a way to hint to the fence signaler of an upcoming deadline, such as
> > vblank, which the fence waiter would prefer not to miss.  This is to aid
> > the fence signaler in making power management decisions, like boosting
> > frequency as the deadline approaches and awareness of missing deadlines
> > so that can be factored in to the frequency scaling.
> >
> > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > ---
> >   drivers/dma-buf/dma-fence.c | 39 +++++++++++++++++++++++++++++++++++++
> >   include/linux/dma-fence.h   | 17 ++++++++++++++++
> >   2 files changed, 56 insertions(+)
> >
> > diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> > index ce0f5eff575d..2e0d25ab457e 100644
> > --- a/drivers/dma-buf/dma-fence.c
> > +++ b/drivers/dma-buf/dma-fence.c
> > @@ -910,6 +910,45 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
> >   }
> >   EXPORT_SYMBOL(dma_fence_wait_any_timeout);
> >
> > +
> > +/**
> > + * dma_fence_set_deadline - set desired fence-wait deadline
> > + * @fence:    the fence that is to be waited on
> > + * @deadline: the time by which the waiter hopes for the fence to be
> > + *            signaled
> > + *
> > + * Inform the fence signaler of an upcoming deadline, such as vblank, by
> > + * which point the waiter would prefer the fence to be signaled by.  This
> > + * is intended to give feedback to the fence signaler to aid in power
> > + * management decisions, such as boosting GPU frequency if a periodic
> > + * vblank deadline is approaching.
> > + */
> > +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
> > +{
> > +     unsigned long flags;
> > +
> > +     if (dma_fence_is_signaled(fence))
> > +             return;
> > +
> > +     spin_lock_irqsave(fence->lock, flags);
> > +
> > +     /* If we already have an earlier deadline, keep it: */
> > +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags) &&
> > +         ktime_before(fence->deadline, deadline)) {
> > +             spin_unlock_irqrestore(fence->lock, flags);
> > +             return;
> > +     }
> > +
> > +     fence->deadline = deadline;
> > +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags);
> > +
> > +     spin_unlock_irqrestore(fence->lock, flags);
> > +
> > +     if (fence->ops->set_deadline)
> > +             fence->ops->set_deadline(fence, deadline);
> > +}
> > +EXPORT_SYMBOL(dma_fence_set_deadline);
> > +
> >   /**
> >    * dma_fence_init - Initialize a custom fence.
> >    * @fence: the fence to initialize
> > diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> > index 6ffb4b2c6371..4e6cfe4e6fbc 100644
> > --- a/include/linux/dma-fence.h
> > +++ b/include/linux/dma-fence.h
> > @@ -88,6 +88,7 @@ struct dma_fence {
> >               /* @timestamp replaced by @rcu on dma_fence_release() */
> >               struct rcu_head rcu;
> >       };
> > +     ktime_t deadline;
>
> Mhm, adding the flag sounds ok to me but I'm a bit hesitating adding the
> deadline as extra field here.
>
> We tuned the dma_fence structure intentionally so that it is only 64 bytes.

Hmm, then I guess you wouldn't be a fan of also adding an hrtimer?

We could push the ktime_t (and timer) down into the derived fence
class, but I think there is going to need to be some extra storage
*somewhere*.. maybe the fence signaler could get away with just
storing the nearest upcoming deadline per fence-context instead?

BR,
-R

> Regards,
> Christian.
>
> >       u64 context;
> >       u64 seqno;
> >       unsigned long flags;
> > @@ -99,6 +100,7 @@ enum dma_fence_flag_bits {
> >       DMA_FENCE_FLAG_SIGNALED_BIT,
> >       DMA_FENCE_FLAG_TIMESTAMP_BIT,
> >       DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> > +     DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
> >       DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
> >   };
> >
> > @@ -261,6 +263,19 @@ struct dma_fence_ops {
> >        */
> >       void (*timeline_value_str)(struct dma_fence *fence,
> >                                  char *str, int size);
> > +
> > +     /**
> > +      * @set_deadline:
> > +      *
> > +      * Callback to allow a fence waiter to inform the fence signaler of an
> > +      * upcoming deadline, such as vblank, by which point the waiter would
> > +      * prefer the fence to be signaled by.  This is intended to give feedback
> > +      * to the fence signaler to aid in power management decisions, such as
> > +      * boosting GPU frequency.
> > +      *
> > +      * This callback is optional.
> > +      */
> > +     void (*set_deadline)(struct dma_fence *fence, ktime_t deadline);
> >   };
> >
> >   void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
> > @@ -586,6 +601,8 @@ static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr)
> >       return ret < 0 ? ret : 0;
> >   }
> >
> > +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline);
> > +
> >   struct dma_fence *dma_fence_get_stub(void);
> >   struct dma_fence *dma_fence_allocate_private_stub(void);
> >   u64 dma_fence_context_alloc(unsigned num);
>
Christian König July 28, 2021, 7:03 a.m. UTC | #3
Am 27.07.21 um 16:25 schrieb Rob Clark:
> On Tue, Jul 27, 2021 at 12:11 AM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Am 27.07.21 um 01:38 schrieb Rob Clark:
>>> From: Rob Clark <robdclark@chromium.org>
>>>
>>> Add a way to hint to the fence signaler of an upcoming deadline, such as
>>> vblank, which the fence waiter would prefer not to miss.  This is to aid
>>> the fence signaler in making power management decisions, like boosting
>>> frequency as the deadline approaches and awareness of missing deadlines
>>> so that can be factored in to the frequency scaling.
>>>
>>> Signed-off-by: Rob Clark <robdclark@chromium.org>
>>> ---
>>>    drivers/dma-buf/dma-fence.c | 39 +++++++++++++++++++++++++++++++++++++
>>>    include/linux/dma-fence.h   | 17 ++++++++++++++++
>>>    2 files changed, 56 insertions(+)
>>>
>>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
>>> index ce0f5eff575d..2e0d25ab457e 100644
>>> --- a/drivers/dma-buf/dma-fence.c
>>> +++ b/drivers/dma-buf/dma-fence.c
>>> @@ -910,6 +910,45 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
>>>    }
>>>    EXPORT_SYMBOL(dma_fence_wait_any_timeout);
>>>
>>> +
>>> +/**
>>> + * dma_fence_set_deadline - set desired fence-wait deadline
>>> + * @fence:    the fence that is to be waited on
>>> + * @deadline: the time by which the waiter hopes for the fence to be
>>> + *            signaled
>>> + *
>>> + * Inform the fence signaler of an upcoming deadline, such as vblank, by
>>> + * which point the waiter would prefer the fence to be signaled by.  This
>>> + * is intended to give feedback to the fence signaler to aid in power
>>> + * management decisions, such as boosting GPU frequency if a periodic
>>> + * vblank deadline is approaching.
>>> + */
>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
>>> +{
>>> +     unsigned long flags;
>>> +
>>> +     if (dma_fence_is_signaled(fence))
>>> +             return;
>>> +
>>> +     spin_lock_irqsave(fence->lock, flags);
>>> +
>>> +     /* If we already have an earlier deadline, keep it: */
>>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags) &&
>>> +         ktime_before(fence->deadline, deadline)) {
>>> +             spin_unlock_irqrestore(fence->lock, flags);
>>> +             return;
>>> +     }
>>> +
>>> +     fence->deadline = deadline;
>>> +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags);
>>> +
>>> +     spin_unlock_irqrestore(fence->lock, flags);
>>> +
>>> +     if (fence->ops->set_deadline)
>>> +             fence->ops->set_deadline(fence, deadline);
>>> +}
>>> +EXPORT_SYMBOL(dma_fence_set_deadline);
>>> +
>>>    /**
>>>     * dma_fence_init - Initialize a custom fence.
>>>     * @fence: the fence to initialize
>>> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
>>> index 6ffb4b2c6371..4e6cfe4e6fbc 100644
>>> --- a/include/linux/dma-fence.h
>>> +++ b/include/linux/dma-fence.h
>>> @@ -88,6 +88,7 @@ struct dma_fence {
>>>                /* @timestamp replaced by @rcu on dma_fence_release() */
>>>                struct rcu_head rcu;
>>>        };
>>> +     ktime_t deadline;
>> Mhm, adding the flag sounds ok to me but I'm a bit hesitating adding the
>> deadline as extra field here.
>>
>> We tuned the dma_fence structure intentionally so that it is only 64 bytes.
> Hmm, then I guess you wouldn't be a fan of also adding an hrtimer?
>
> We could push the ktime_t (and timer) down into the derived fence
> class, but I think there is going to need to be some extra storage
> *somewhere*.. maybe the fence signaler could get away with just
> storing the nearest upcoming deadline per fence-context instead?

I would just push that into the driver instead.

You most likely don't want the deadline per fence anyway in complex 
scenarios, but rather per frame. And a frame is usually composed from 
multiple fences.

Regards,
Christian.

>
> BR,
> -R
>
>> Regards,
>> Christian.
>>
>>>        u64 context;
>>>        u64 seqno;
>>>        unsigned long flags;
>>> @@ -99,6 +100,7 @@ enum dma_fence_flag_bits {
>>>        DMA_FENCE_FLAG_SIGNALED_BIT,
>>>        DMA_FENCE_FLAG_TIMESTAMP_BIT,
>>>        DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
>>> +     DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
>>>        DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
>>>    };
>>>
>>> @@ -261,6 +263,19 @@ struct dma_fence_ops {
>>>         */
>>>        void (*timeline_value_str)(struct dma_fence *fence,
>>>                                   char *str, int size);
>>> +
>>> +     /**
>>> +      * @set_deadline:
>>> +      *
>>> +      * Callback to allow a fence waiter to inform the fence signaler of an
>>> +      * upcoming deadline, such as vblank, by which point the waiter would
>>> +      * prefer the fence to be signaled by.  This is intended to give feedback
>>> +      * to the fence signaler to aid in power management decisions, such as
>>> +      * boosting GPU frequency.
>>> +      *
>>> +      * This callback is optional.
>>> +      */
>>> +     void (*set_deadline)(struct dma_fence *fence, ktime_t deadline);
>>>    };
>>>
>>>    void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
>>> @@ -586,6 +601,8 @@ static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr)
>>>        return ret < 0 ? ret : 0;
>>>    }
>>>
>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline);
>>> +
>>>    struct dma_fence *dma_fence_get_stub(void);
>>>    struct dma_fence *dma_fence_allocate_private_stub(void);
>>>    u64 dma_fence_context_alloc(unsigned num);
Christian König July 28, 2021, 11:37 a.m. UTC | #4
Am 28.07.21 um 09:03 schrieb Christian König:
> Am 27.07.21 um 16:25 schrieb Rob Clark:
>> On Tue, Jul 27, 2021 at 12:11 AM Christian König
>> <ckoenig.leichtzumerken@gmail.com> wrote:
>>> Am 27.07.21 um 01:38 schrieb Rob Clark:
>>>> From: Rob Clark <robdclark@chromium.org>
>>>>
>>>> Add a way to hint to the fence signaler of an upcoming deadline, 
>>>> such as
>>>> vblank, which the fence waiter would prefer not to miss. This is to 
>>>> aid
>>>> the fence signaler in making power management decisions, like boosting
>>>> frequency as the deadline approaches and awareness of missing 
>>>> deadlines
>>>> so that can be factored in to the frequency scaling.
>>>>
>>>> Signed-off-by: Rob Clark <robdclark@chromium.org>
>>>> ---
>>>>    drivers/dma-buf/dma-fence.c | 39 
>>>> +++++++++++++++++++++++++++++++++++++
>>>>    include/linux/dma-fence.h   | 17 ++++++++++++++++
>>>>    2 files changed, 56 insertions(+)
>>>>
>>>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
>>>> index ce0f5eff575d..2e0d25ab457e 100644
>>>> --- a/drivers/dma-buf/dma-fence.c
>>>> +++ b/drivers/dma-buf/dma-fence.c
>>>> @@ -910,6 +910,45 @@ dma_fence_wait_any_timeout(struct dma_fence 
>>>> **fences, uint32_t count,
>>>>    }
>>>>    EXPORT_SYMBOL(dma_fence_wait_any_timeout);
>>>>
>>>> +
>>>> +/**
>>>> + * dma_fence_set_deadline - set desired fence-wait deadline
>>>> + * @fence:    the fence that is to be waited on
>>>> + * @deadline: the time by which the waiter hopes for the fence to be
>>>> + *            signaled
>>>> + *
>>>> + * Inform the fence signaler of an upcoming deadline, such as 
>>>> vblank, by
>>>> + * which point the waiter would prefer the fence to be signaled 
>>>> by.  This
>>>> + * is intended to give feedback to the fence signaler to aid in power
>>>> + * management decisions, such as boosting GPU frequency if a periodic
>>>> + * vblank deadline is approaching.
>>>> + */
>>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t 
>>>> deadline)
>>>> +{
>>>> +     unsigned long flags;
>>>> +
>>>> +     if (dma_fence_is_signaled(fence))
>>>> +             return;
>>>> +
>>>> +     spin_lock_irqsave(fence->lock, flags);
>>>> +
>>>> +     /* If we already have an earlier deadline, keep it: */
>>>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags) &&
>>>> +         ktime_before(fence->deadline, deadline)) {
>>>> +             spin_unlock_irqrestore(fence->lock, flags);
>>>> +             return;
>>>> +     }
>>>> +
>>>> +     fence->deadline = deadline;
>>>> +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags);
>>>> +
>>>> +     spin_unlock_irqrestore(fence->lock, flags);
>>>> +
>>>> +     if (fence->ops->set_deadline)
>>>> +             fence->ops->set_deadline(fence, deadline);
>>>> +}
>>>> +EXPORT_SYMBOL(dma_fence_set_deadline);
>>>> +
>>>>    /**
>>>>     * dma_fence_init - Initialize a custom fence.
>>>>     * @fence: the fence to initialize
>>>> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
>>>> index 6ffb4b2c6371..4e6cfe4e6fbc 100644
>>>> --- a/include/linux/dma-fence.h
>>>> +++ b/include/linux/dma-fence.h
>>>> @@ -88,6 +88,7 @@ struct dma_fence {
>>>>                /* @timestamp replaced by @rcu on 
>>>> dma_fence_release() */
>>>>                struct rcu_head rcu;
>>>>        };
>>>> +     ktime_t deadline;
>>> Mhm, adding the flag sounds ok to me but I'm a bit hesitating adding 
>>> the
>>> deadline as extra field here.
>>>
>>> We tuned the dma_fence structure intentionally so that it is only 64 
>>> bytes.
>> Hmm, then I guess you wouldn't be a fan of also adding an hrtimer?
>>
>> We could push the ktime_t (and timer) down into the derived fence
>> class, but I think there is going to need to be some extra storage
>> *somewhere*.. maybe the fence signaler could get away with just
>> storing the nearest upcoming deadline per fence-context instead?
>
> I would just push that into the driver instead.
>
> You most likely don't want the deadline per fence anyway in complex 
> scenarios, but rather per frame. And a frame is usually composed from 
> multiple fences.

Thinking more about it we could probably kill the spinlock pointer and 
make the flags 32bit if we absolutely need that here.

But I still don't see the need for that, especially since most drivers 
probably won't implement it.

Regards,
Christian.

>
> Regards,
> Christian.
>
>>
>> BR,
>> -R
>>
>>> Regards,
>>> Christian.
>>>
>>>>        u64 context;
>>>>        u64 seqno;
>>>>        unsigned long flags;
>>>> @@ -99,6 +100,7 @@ enum dma_fence_flag_bits {
>>>>        DMA_FENCE_FLAG_SIGNALED_BIT,
>>>>        DMA_FENCE_FLAG_TIMESTAMP_BIT,
>>>>        DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
>>>> +     DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
>>>>        DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
>>>>    };
>>>>
>>>> @@ -261,6 +263,19 @@ struct dma_fence_ops {
>>>>         */
>>>>        void (*timeline_value_str)(struct dma_fence *fence,
>>>>                                   char *str, int size);
>>>> +
>>>> +     /**
>>>> +      * @set_deadline:
>>>> +      *
>>>> +      * Callback to allow a fence waiter to inform the fence 
>>>> signaler of an
>>>> +      * upcoming deadline, such as vblank, by which point the 
>>>> waiter would
>>>> +      * prefer the fence to be signaled by.  This is intended to 
>>>> give feedback
>>>> +      * to the fence signaler to aid in power management 
>>>> decisions, such as
>>>> +      * boosting GPU frequency.
>>>> +      *
>>>> +      * This callback is optional.
>>>> +      */
>>>> +     void (*set_deadline)(struct dma_fence *fence, ktime_t deadline);
>>>>    };
>>>>
>>>>    void dma_fence_init(struct dma_fence *fence, const struct 
>>>> dma_fence_ops *ops,
>>>> @@ -586,6 +601,8 @@ static inline signed long dma_fence_wait(struct 
>>>> dma_fence *fence, bool intr)
>>>>        return ret < 0 ? ret : 0;
>>>>    }
>>>>
>>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t 
>>>> deadline);
>>>> +
>>>>    struct dma_fence *dma_fence_get_stub(void);
>>>>    struct dma_fence *dma_fence_allocate_private_stub(void);
>>>>    u64 dma_fence_context_alloc(unsigned num);
>
Rob Clark July 28, 2021, 3:15 p.m. UTC | #5
On Wed, Jul 28, 2021 at 4:37 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 28.07.21 um 09:03 schrieb Christian König:
> > Am 27.07.21 um 16:25 schrieb Rob Clark:
> >> On Tue, Jul 27, 2021 at 12:11 AM Christian König
> >> <ckoenig.leichtzumerken@gmail.com> wrote:
> >>> Am 27.07.21 um 01:38 schrieb Rob Clark:
> >>>> From: Rob Clark <robdclark@chromium.org>
> >>>>
> >>>> Add a way to hint to the fence signaler of an upcoming deadline,
> >>>> such as
> >>>> vblank, which the fence waiter would prefer not to miss. This is to
> >>>> aid
> >>>> the fence signaler in making power management decisions, like boosting
> >>>> frequency as the deadline approaches and awareness of missing
> >>>> deadlines
> >>>> so that can be factored in to the frequency scaling.
> >>>>
> >>>> Signed-off-by: Rob Clark <robdclark@chromium.org>
> >>>> ---
> >>>>    drivers/dma-buf/dma-fence.c | 39
> >>>> +++++++++++++++++++++++++++++++++++++
> >>>>    include/linux/dma-fence.h   | 17 ++++++++++++++++
> >>>>    2 files changed, 56 insertions(+)
> >>>>
> >>>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> >>>> index ce0f5eff575d..2e0d25ab457e 100644
> >>>> --- a/drivers/dma-buf/dma-fence.c
> >>>> +++ b/drivers/dma-buf/dma-fence.c
> >>>> @@ -910,6 +910,45 @@ dma_fence_wait_any_timeout(struct dma_fence
> >>>> **fences, uint32_t count,
> >>>>    }
> >>>>    EXPORT_SYMBOL(dma_fence_wait_any_timeout);
> >>>>
> >>>> +
> >>>> +/**
> >>>> + * dma_fence_set_deadline - set desired fence-wait deadline
> >>>> + * @fence:    the fence that is to be waited on
> >>>> + * @deadline: the time by which the waiter hopes for the fence to be
> >>>> + *            signaled
> >>>> + *
> >>>> + * Inform the fence signaler of an upcoming deadline, such as
> >>>> vblank, by
> >>>> + * which point the waiter would prefer the fence to be signaled
> >>>> by.  This
> >>>> + * is intended to give feedback to the fence signaler to aid in power
> >>>> + * management decisions, such as boosting GPU frequency if a periodic
> >>>> + * vblank deadline is approaching.
> >>>> + */
> >>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t
> >>>> deadline)
> >>>> +{
> >>>> +     unsigned long flags;
> >>>> +
> >>>> +     if (dma_fence_is_signaled(fence))
> >>>> +             return;
> >>>> +
> >>>> +     spin_lock_irqsave(fence->lock, flags);
> >>>> +
> >>>> +     /* If we already have an earlier deadline, keep it: */
> >>>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags) &&
> >>>> +         ktime_before(fence->deadline, deadline)) {
> >>>> +             spin_unlock_irqrestore(fence->lock, flags);
> >>>> +             return;
> >>>> +     }
> >>>> +
> >>>> +     fence->deadline = deadline;
> >>>> +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags);
> >>>> +
> >>>> +     spin_unlock_irqrestore(fence->lock, flags);
> >>>> +
> >>>> +     if (fence->ops->set_deadline)
> >>>> +             fence->ops->set_deadline(fence, deadline);
> >>>> +}
> >>>> +EXPORT_SYMBOL(dma_fence_set_deadline);
> >>>> +
> >>>>    /**
> >>>>     * dma_fence_init - Initialize a custom fence.
> >>>>     * @fence: the fence to initialize
> >>>> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> >>>> index 6ffb4b2c6371..4e6cfe4e6fbc 100644
> >>>> --- a/include/linux/dma-fence.h
> >>>> +++ b/include/linux/dma-fence.h
> >>>> @@ -88,6 +88,7 @@ struct dma_fence {
> >>>>                /* @timestamp replaced by @rcu on
> >>>> dma_fence_release() */
> >>>>                struct rcu_head rcu;
> >>>>        };
> >>>> +     ktime_t deadline;
> >>> Mhm, adding the flag sounds ok to me but I'm a bit hesitating adding
> >>> the
> >>> deadline as extra field here.
> >>>
> >>> We tuned the dma_fence structure intentionally so that it is only 64
> >>> bytes.
> >> Hmm, then I guess you wouldn't be a fan of also adding an hrtimer?
> >>
> >> We could push the ktime_t (and timer) down into the derived fence
> >> class, but I think there is going to need to be some extra storage
> >> *somewhere*.. maybe the fence signaler could get away with just
> >> storing the nearest upcoming deadline per fence-context instead?
> >
> > I would just push that into the driver instead.
> >
> > You most likely don't want the deadline per fence anyway in complex
> > scenarios, but rather per frame. And a frame is usually composed from
> > multiple fences.

Right, I ended up keeping track of the nearest deadline in patch 5/4
which added drm/msm support:

  https://patchwork.freedesktop.org/patch/447138/

But if we do have the ktime_t in dma_fence in dma_fence, we can add
some checks and avoid calling back to the driver if a later deadline
is set on a fence that already has an earlier deadline.  OTOH I
suppose I can push all that back to the driver to start, and we can
revisit once we have more drivers implementing deadline support.

>
> Thinking more about it we could probably kill the spinlock pointer and
> make the flags 32bit if we absolutely need that here.

If we had a 'struct dma_fence_context' we could push the spinlock, ops
pointer, and u64 context into that and replace with a single
dma_fence_context ptr, fwiw

BR,
-R

> But I still don't see the need for that, especially since most drivers
> probably won't implement it.
>
> Regards,
> Christian.
>
> >
> > Regards,
> > Christian.
> >
> >>
> >> BR,
> >> -R
> >>
> >>> Regards,
> >>> Christian.
> >>>
> >>>>        u64 context;
> >>>>        u64 seqno;
> >>>>        unsigned long flags;
> >>>> @@ -99,6 +100,7 @@ enum dma_fence_flag_bits {
> >>>>        DMA_FENCE_FLAG_SIGNALED_BIT,
> >>>>        DMA_FENCE_FLAG_TIMESTAMP_BIT,
> >>>>        DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> >>>> +     DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
> >>>>        DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
> >>>>    };
> >>>>
> >>>> @@ -261,6 +263,19 @@ struct dma_fence_ops {
> >>>>         */
> >>>>        void (*timeline_value_str)(struct dma_fence *fence,
> >>>>                                   char *str, int size);
> >>>> +
> >>>> +     /**
> >>>> +      * @set_deadline:
> >>>> +      *
> >>>> +      * Callback to allow a fence waiter to inform the fence
> >>>> signaler of an
> >>>> +      * upcoming deadline, such as vblank, by which point the
> >>>> waiter would
> >>>> +      * prefer the fence to be signaled by.  This is intended to
> >>>> give feedback
> >>>> +      * to the fence signaler to aid in power management
> >>>> decisions, such as
> >>>> +      * boosting GPU frequency.
> >>>> +      *
> >>>> +      * This callback is optional.
> >>>> +      */
> >>>> +     void (*set_deadline)(struct dma_fence *fence, ktime_t deadline);
> >>>>    };
> >>>>
> >>>>    void dma_fence_init(struct dma_fence *fence, const struct
> >>>> dma_fence_ops *ops,
> >>>> @@ -586,6 +601,8 @@ static inline signed long dma_fence_wait(struct
> >>>> dma_fence *fence, bool intr)
> >>>>        return ret < 0 ? ret : 0;
> >>>>    }
> >>>>
> >>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t
> >>>> deadline);
> >>>> +
> >>>>    struct dma_fence *dma_fence_get_stub(void);
> >>>>    struct dma_fence *dma_fence_allocate_private_stub(void);
> >>>>    u64 dma_fence_context_alloc(unsigned num);
> >
>
Christian König July 28, 2021, 5:23 p.m. UTC | #6
Am 28.07.21 um 17:15 schrieb Rob Clark:
> On Wed, Jul 28, 2021 at 4:37 AM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Am 28.07.21 um 09:03 schrieb Christian König:
>>> Am 27.07.21 um 16:25 schrieb Rob Clark:
>>>> On Tue, Jul 27, 2021 at 12:11 AM Christian König
>>>> <ckoenig.leichtzumerken@gmail.com> wrote:
>>>>> Am 27.07.21 um 01:38 schrieb Rob Clark:
>>>>>> From: Rob Clark <robdclark@chromium.org>
>>>>>>
>>>>>> Add a way to hint to the fence signaler of an upcoming deadline,
>>>>>> such as
>>>>>> vblank, which the fence waiter would prefer not to miss. This is to
>>>>>> aid
>>>>>> the fence signaler in making power management decisions, like boosting
>>>>>> frequency as the deadline approaches and awareness of missing
>>>>>> deadlines
>>>>>> so that can be factored in to the frequency scaling.
>>>>>>
>>>>>> Signed-off-by: Rob Clark <robdclark@chromium.org>
>>>>>> ---
>>>>>>     drivers/dma-buf/dma-fence.c | 39
>>>>>> +++++++++++++++++++++++++++++++++++++
>>>>>>     include/linux/dma-fence.h   | 17 ++++++++++++++++
>>>>>>     2 files changed, 56 insertions(+)
>>>>>>
>>>>>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
>>>>>> index ce0f5eff575d..2e0d25ab457e 100644
>>>>>> --- a/drivers/dma-buf/dma-fence.c
>>>>>> +++ b/drivers/dma-buf/dma-fence.c
>>>>>> @@ -910,6 +910,45 @@ dma_fence_wait_any_timeout(struct dma_fence
>>>>>> **fences, uint32_t count,
>>>>>>     }
>>>>>>     EXPORT_SYMBOL(dma_fence_wait_any_timeout);
>>>>>>
>>>>>> +
>>>>>> +/**
>>>>>> + * dma_fence_set_deadline - set desired fence-wait deadline
>>>>>> + * @fence:    the fence that is to be waited on
>>>>>> + * @deadline: the time by which the waiter hopes for the fence to be
>>>>>> + *            signaled
>>>>>> + *
>>>>>> + * Inform the fence signaler of an upcoming deadline, such as
>>>>>> vblank, by
>>>>>> + * which point the waiter would prefer the fence to be signaled
>>>>>> by.  This
>>>>>> + * is intended to give feedback to the fence signaler to aid in power
>>>>>> + * management decisions, such as boosting GPU frequency if a periodic
>>>>>> + * vblank deadline is approaching.
>>>>>> + */
>>>>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t
>>>>>> deadline)
>>>>>> +{
>>>>>> +     unsigned long flags;
>>>>>> +
>>>>>> +     if (dma_fence_is_signaled(fence))
>>>>>> +             return;
>>>>>> +
>>>>>> +     spin_lock_irqsave(fence->lock, flags);
>>>>>> +
>>>>>> +     /* If we already have an earlier deadline, keep it: */
>>>>>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags) &&
>>>>>> +         ktime_before(fence->deadline, deadline)) {
>>>>>> +             spin_unlock_irqrestore(fence->lock, flags);
>>>>>> +             return;
>>>>>> +     }
>>>>>> +
>>>>>> +     fence->deadline = deadline;
>>>>>> +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags);
>>>>>> +
>>>>>> +     spin_unlock_irqrestore(fence->lock, flags);
>>>>>> +
>>>>>> +     if (fence->ops->set_deadline)
>>>>>> +             fence->ops->set_deadline(fence, deadline);
>>>>>> +}
>>>>>> +EXPORT_SYMBOL(dma_fence_set_deadline);
>>>>>> +
>>>>>>     /**
>>>>>>      * dma_fence_init - Initialize a custom fence.
>>>>>>      * @fence: the fence to initialize
>>>>>> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
>>>>>> index 6ffb4b2c6371..4e6cfe4e6fbc 100644
>>>>>> --- a/include/linux/dma-fence.h
>>>>>> +++ b/include/linux/dma-fence.h
>>>>>> @@ -88,6 +88,7 @@ struct dma_fence {
>>>>>>                 /* @timestamp replaced by @rcu on
>>>>>> dma_fence_release() */
>>>>>>                 struct rcu_head rcu;
>>>>>>         };
>>>>>> +     ktime_t deadline;
>>>>> Mhm, adding the flag sounds ok to me but I'm a bit hesitating adding
>>>>> the
>>>>> deadline as extra field here.
>>>>>
>>>>> We tuned the dma_fence structure intentionally so that it is only 64
>>>>> bytes.
>>>> Hmm, then I guess you wouldn't be a fan of also adding an hrtimer?
>>>>
>>>> We could push the ktime_t (and timer) down into the derived fence
>>>> class, but I think there is going to need to be some extra storage
>>>> *somewhere*.. maybe the fence signaler could get away with just
>>>> storing the nearest upcoming deadline per fence-context instead?
>>> I would just push that into the driver instead.
>>>
>>> You most likely don't want the deadline per fence anyway in complex
>>> scenarios, but rather per frame. And a frame is usually composed from
>>> multiple fences.
> Right, I ended up keeping track of the nearest deadline in patch 5/4
> which added drm/msm support:
>
>    https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fpatch%2F447138%2F&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7Cce6ace85263d448bbc9f08d951d9f06c%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637630819606427306%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=ameszAOlClaZNeUDlYr37ZdIytVXNgiEUKuctjXLqZ0%3D&amp;reserved=0
>
> But if we do have the ktime_t in dma_fence in dma_fence, we can add
> some checks and avoid calling back to the driver if a later deadline
> is set on a fence that already has an earlier deadline.  OTOH I
> suppose I can push all that back to the driver to start, and we can
> revisit once we have more drivers implementing deadline support.

I still think that all of this is rather specific to your use case and 
have strong doubt that anybody else will implement that.

>> Thinking more about it we could probably kill the spinlock pointer and
>> make the flags 32bit if we absolutely need that here.
> If we had a 'struct dma_fence_context' we could push the spinlock, ops
> pointer, and u64 context into that and replace with a single
> dma_fence_context ptr, fwiw

That won't work. We have a lot of use cases where you can't allocate 
memory, but must allocate a context.

Christian.

>
> BR,
> -R
>
>> But I still don't see the need for that, especially since most drivers
>> probably won't implement it.
>>
>> Regards,
>> Christian.
>>
>>> Regards,
>>> Christian.
>>>
>>>> BR,
>>>> -R
>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>>>         u64 context;
>>>>>>         u64 seqno;
>>>>>>         unsigned long flags;
>>>>>> @@ -99,6 +100,7 @@ enum dma_fence_flag_bits {
>>>>>>         DMA_FENCE_FLAG_SIGNALED_BIT,
>>>>>>         DMA_FENCE_FLAG_TIMESTAMP_BIT,
>>>>>>         DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
>>>>>> +     DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
>>>>>>         DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
>>>>>>     };
>>>>>>
>>>>>> @@ -261,6 +263,19 @@ struct dma_fence_ops {
>>>>>>          */
>>>>>>         void (*timeline_value_str)(struct dma_fence *fence,
>>>>>>                                    char *str, int size);
>>>>>> +
>>>>>> +     /**
>>>>>> +      * @set_deadline:
>>>>>> +      *
>>>>>> +      * Callback to allow a fence waiter to inform the fence
>>>>>> signaler of an
>>>>>> +      * upcoming deadline, such as vblank, by which point the
>>>>>> waiter would
>>>>>> +      * prefer the fence to be signaled by.  This is intended to
>>>>>> give feedback
>>>>>> +      * to the fence signaler to aid in power management
>>>>>> decisions, such as
>>>>>> +      * boosting GPU frequency.
>>>>>> +      *
>>>>>> +      * This callback is optional.
>>>>>> +      */
>>>>>> +     void (*set_deadline)(struct dma_fence *fence, ktime_t deadline);
>>>>>>     };
>>>>>>
>>>>>>     void dma_fence_init(struct dma_fence *fence, const struct
>>>>>> dma_fence_ops *ops,
>>>>>> @@ -586,6 +601,8 @@ static inline signed long dma_fence_wait(struct
>>>>>> dma_fence *fence, bool intr)
>>>>>>         return ret < 0 ? ret : 0;
>>>>>>     }
>>>>>>
>>>>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t
>>>>>> deadline);
>>>>>> +
>>>>>>     struct dma_fence *dma_fence_get_stub(void);
>>>>>>     struct dma_fence *dma_fence_allocate_private_stub(void);
>>>>>>     u64 dma_fence_context_alloc(unsigned num);
Rob Clark July 28, 2021, 5:58 p.m. UTC | #7
On Wed, Jul 28, 2021 at 10:23 AM Christian König
<christian.koenig@amd.com> wrote:
>
>
>
> Am 28.07.21 um 17:15 schrieb Rob Clark:
> > On Wed, Jul 28, 2021 at 4:37 AM Christian König
> > <ckoenig.leichtzumerken@gmail.com> wrote:
> >> Am 28.07.21 um 09:03 schrieb Christian König:
> >>> Am 27.07.21 um 16:25 schrieb Rob Clark:
> >>>> On Tue, Jul 27, 2021 at 12:11 AM Christian König
> >>>> <ckoenig.leichtzumerken@gmail.com> wrote:
> >>>>> Am 27.07.21 um 01:38 schrieb Rob Clark:
> >>>>>> From: Rob Clark <robdclark@chromium.org>
> >>>>>>
> >>>>>> Add a way to hint to the fence signaler of an upcoming deadline,
> >>>>>> such as
> >>>>>> vblank, which the fence waiter would prefer not to miss. This is to
> >>>>>> aid
> >>>>>> the fence signaler in making power management decisions, like boosting
> >>>>>> frequency as the deadline approaches and awareness of missing
> >>>>>> deadlines
> >>>>>> so that can be factored in to the frequency scaling.
> >>>>>>
> >>>>>> Signed-off-by: Rob Clark <robdclark@chromium.org>
> >>>>>> ---
> >>>>>>     drivers/dma-buf/dma-fence.c | 39
> >>>>>> +++++++++++++++++++++++++++++++++++++
> >>>>>>     include/linux/dma-fence.h   | 17 ++++++++++++++++
> >>>>>>     2 files changed, 56 insertions(+)
> >>>>>>
> >>>>>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> >>>>>> index ce0f5eff575d..2e0d25ab457e 100644
> >>>>>> --- a/drivers/dma-buf/dma-fence.c
> >>>>>> +++ b/drivers/dma-buf/dma-fence.c
> >>>>>> @@ -910,6 +910,45 @@ dma_fence_wait_any_timeout(struct dma_fence
> >>>>>> **fences, uint32_t count,
> >>>>>>     }
> >>>>>>     EXPORT_SYMBOL(dma_fence_wait_any_timeout);
> >>>>>>
> >>>>>> +
> >>>>>> +/**
> >>>>>> + * dma_fence_set_deadline - set desired fence-wait deadline
> >>>>>> + * @fence:    the fence that is to be waited on
> >>>>>> + * @deadline: the time by which the waiter hopes for the fence to be
> >>>>>> + *            signaled
> >>>>>> + *
> >>>>>> + * Inform the fence signaler of an upcoming deadline, such as
> >>>>>> vblank, by
> >>>>>> + * which point the waiter would prefer the fence to be signaled
> >>>>>> by.  This
> >>>>>> + * is intended to give feedback to the fence signaler to aid in power
> >>>>>> + * management decisions, such as boosting GPU frequency if a periodic
> >>>>>> + * vblank deadline is approaching.
> >>>>>> + */
> >>>>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t
> >>>>>> deadline)
> >>>>>> +{
> >>>>>> +     unsigned long flags;
> >>>>>> +
> >>>>>> +     if (dma_fence_is_signaled(fence))
> >>>>>> +             return;
> >>>>>> +
> >>>>>> +     spin_lock_irqsave(fence->lock, flags);
> >>>>>> +
> >>>>>> +     /* If we already have an earlier deadline, keep it: */
> >>>>>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags) &&
> >>>>>> +         ktime_before(fence->deadline, deadline)) {
> >>>>>> +             spin_unlock_irqrestore(fence->lock, flags);
> >>>>>> +             return;
> >>>>>> +     }
> >>>>>> +
> >>>>>> +     fence->deadline = deadline;
> >>>>>> +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags);
> >>>>>> +
> >>>>>> +     spin_unlock_irqrestore(fence->lock, flags);
> >>>>>> +
> >>>>>> +     if (fence->ops->set_deadline)
> >>>>>> +             fence->ops->set_deadline(fence, deadline);
> >>>>>> +}
> >>>>>> +EXPORT_SYMBOL(dma_fence_set_deadline);
> >>>>>> +
> >>>>>>     /**
> >>>>>>      * dma_fence_init - Initialize a custom fence.
> >>>>>>      * @fence: the fence to initialize
> >>>>>> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> >>>>>> index 6ffb4b2c6371..4e6cfe4e6fbc 100644
> >>>>>> --- a/include/linux/dma-fence.h
> >>>>>> +++ b/include/linux/dma-fence.h
> >>>>>> @@ -88,6 +88,7 @@ struct dma_fence {
> >>>>>>                 /* @timestamp replaced by @rcu on
> >>>>>> dma_fence_release() */
> >>>>>>                 struct rcu_head rcu;
> >>>>>>         };
> >>>>>> +     ktime_t deadline;
> >>>>> Mhm, adding the flag sounds ok to me but I'm a bit hesitating adding
> >>>>> the
> >>>>> deadline as extra field here.
> >>>>>
> >>>>> We tuned the dma_fence structure intentionally so that it is only 64
> >>>>> bytes.
> >>>> Hmm, then I guess you wouldn't be a fan of also adding an hrtimer?
> >>>>
> >>>> We could push the ktime_t (and timer) down into the derived fence
> >>>> class, but I think there is going to need to be some extra storage
> >>>> *somewhere*.. maybe the fence signaler could get away with just
> >>>> storing the nearest upcoming deadline per fence-context instead?
> >>> I would just push that into the driver instead.
> >>>
> >>> You most likely don't want the deadline per fence anyway in complex
> >>> scenarios, but rather per frame. And a frame is usually composed from
> >>> multiple fences.
> > Right, I ended up keeping track of the nearest deadline in patch 5/4
> > which added drm/msm support:
> >
> >    https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fpatch%2F447138%2F&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7Cce6ace85263d448bbc9f08d951d9f06c%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637630819606427306%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=ameszAOlClaZNeUDlYr37ZdIytVXNgiEUKuctjXLqZ0%3D&amp;reserved=0
> >
> > But if we do have the ktime_t in dma_fence in dma_fence, we can add
> > some checks and avoid calling back to the driver if a later deadline
> > is set on a fence that already has an earlier deadline.  OTOH I
> > suppose I can push all that back to the driver to start, and we can
> > revisit once we have more drivers implementing deadline support.
>
> I still think that all of this is rather specific to your use case and
> have strong doubt that anybody else will implement that.

i915 does already have a similar thing in it's hand-rolled atomic
commit path.  So I think msm won't be the only one.  It should be also
useful to the other mobile GPUs with a gpu vs kms driver split,
although looking at the other gpu devfreq implementations, I don't
think they've yet gotten to this point in the fine tuning..

BR,
-R

> >> Thinking more about it we could probably kill the spinlock pointer and
> >> make the flags 32bit if we absolutely need that here.
> > If we had a 'struct dma_fence_context' we could push the spinlock, ops
> > pointer, and u64 context into that and replace with a single
> > dma_fence_context ptr, fwiw
>
> That won't work. We have a lot of use cases where you can't allocate
> memory, but must allocate a context.
>
> Christian.
>
> >
> > BR,
> > -R
> >
> >> But I still don't see the need for that, especially since most drivers
> >> probably won't implement it.
> >>
> >> Regards,
> >> Christian.
> >>
> >>> Regards,
> >>> Christian.
> >>>
> >>>> BR,
> >>>> -R
> >>>>
> >>>>> Regards,
> >>>>> Christian.
> >>>>>
> >>>>>>         u64 context;
> >>>>>>         u64 seqno;
> >>>>>>         unsigned long flags;
> >>>>>> @@ -99,6 +100,7 @@ enum dma_fence_flag_bits {
> >>>>>>         DMA_FENCE_FLAG_SIGNALED_BIT,
> >>>>>>         DMA_FENCE_FLAG_TIMESTAMP_BIT,
> >>>>>>         DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> >>>>>> +     DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
> >>>>>>         DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
> >>>>>>     };
> >>>>>>
> >>>>>> @@ -261,6 +263,19 @@ struct dma_fence_ops {
> >>>>>>          */
> >>>>>>         void (*timeline_value_str)(struct dma_fence *fence,
> >>>>>>                                    char *str, int size);
> >>>>>> +
> >>>>>> +     /**
> >>>>>> +      * @set_deadline:
> >>>>>> +      *
> >>>>>> +      * Callback to allow a fence waiter to inform the fence
> >>>>>> signaler of an
> >>>>>> +      * upcoming deadline, such as vblank, by which point the
> >>>>>> waiter would
> >>>>>> +      * prefer the fence to be signaled by.  This is intended to
> >>>>>> give feedback
> >>>>>> +      * to the fence signaler to aid in power management
> >>>>>> decisions, such as
> >>>>>> +      * boosting GPU frequency.
> >>>>>> +      *
> >>>>>> +      * This callback is optional.
> >>>>>> +      */
> >>>>>> +     void (*set_deadline)(struct dma_fence *fence, ktime_t deadline);
> >>>>>>     };
> >>>>>>
> >>>>>>     void dma_fence_init(struct dma_fence *fence, const struct
> >>>>>> dma_fence_ops *ops,
> >>>>>> @@ -586,6 +601,8 @@ static inline signed long dma_fence_wait(struct
> >>>>>> dma_fence *fence, bool intr)
> >>>>>>         return ret < 0 ? ret : 0;
> >>>>>>     }
> >>>>>>
> >>>>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t
> >>>>>> deadline);
> >>>>>> +
> >>>>>>     struct dma_fence *dma_fence_get_stub(void);
> >>>>>>     struct dma_fence *dma_fence_allocate_private_stub(void);
> >>>>>>     u64 dma_fence_context_alloc(unsigned num);
>
Daniel Vetter July 29, 2021, 7:03 a.m. UTC | #8
On Wed, Jul 28, 2021 at 10:58:51AM -0700, Rob Clark wrote:
> On Wed, Jul 28, 2021 at 10:23 AM Christian König
> <christian.koenig@amd.com> wrote:
> >
> >
> >
> > Am 28.07.21 um 17:15 schrieb Rob Clark:
> > > On Wed, Jul 28, 2021 at 4:37 AM Christian König
> > > <ckoenig.leichtzumerken@gmail.com> wrote:
> > >> Am 28.07.21 um 09:03 schrieb Christian König:
> > >>> Am 27.07.21 um 16:25 schrieb Rob Clark:
> > >>>> On Tue, Jul 27, 2021 at 12:11 AM Christian König
> > >>>> <ckoenig.leichtzumerken@gmail.com> wrote:
> > >>>>> Am 27.07.21 um 01:38 schrieb Rob Clark:
> > >>>>>> From: Rob Clark <robdclark@chromium.org>
> > >>>>>>
> > >>>>>> Add a way to hint to the fence signaler of an upcoming deadline,
> > >>>>>> such as
> > >>>>>> vblank, which the fence waiter would prefer not to miss. This is to
> > >>>>>> aid
> > >>>>>> the fence signaler in making power management decisions, like boosting
> > >>>>>> frequency as the deadline approaches and awareness of missing
> > >>>>>> deadlines
> > >>>>>> so that can be factored in to the frequency scaling.
> > >>>>>>
> > >>>>>> Signed-off-by: Rob Clark <robdclark@chromium.org>
> > >>>>>> ---
> > >>>>>>     drivers/dma-buf/dma-fence.c | 39
> > >>>>>> +++++++++++++++++++++++++++++++++++++
> > >>>>>>     include/linux/dma-fence.h   | 17 ++++++++++++++++
> > >>>>>>     2 files changed, 56 insertions(+)
> > >>>>>>
> > >>>>>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> > >>>>>> index ce0f5eff575d..2e0d25ab457e 100644
> > >>>>>> --- a/drivers/dma-buf/dma-fence.c
> > >>>>>> +++ b/drivers/dma-buf/dma-fence.c
> > >>>>>> @@ -910,6 +910,45 @@ dma_fence_wait_any_timeout(struct dma_fence
> > >>>>>> **fences, uint32_t count,
> > >>>>>>     }
> > >>>>>>     EXPORT_SYMBOL(dma_fence_wait_any_timeout);
> > >>>>>>
> > >>>>>> +
> > >>>>>> +/**
> > >>>>>> + * dma_fence_set_deadline - set desired fence-wait deadline
> > >>>>>> + * @fence:    the fence that is to be waited on
> > >>>>>> + * @deadline: the time by which the waiter hopes for the fence to be
> > >>>>>> + *            signaled
> > >>>>>> + *
> > >>>>>> + * Inform the fence signaler of an upcoming deadline, such as
> > >>>>>> vblank, by
> > >>>>>> + * which point the waiter would prefer the fence to be signaled
> > >>>>>> by.  This
> > >>>>>> + * is intended to give feedback to the fence signaler to aid in power
> > >>>>>> + * management decisions, such as boosting GPU frequency if a periodic
> > >>>>>> + * vblank deadline is approaching.
> > >>>>>> + */
> > >>>>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t
> > >>>>>> deadline)
> > >>>>>> +{
> > >>>>>> +     unsigned long flags;
> > >>>>>> +
> > >>>>>> +     if (dma_fence_is_signaled(fence))
> > >>>>>> +             return;
> > >>>>>> +
> > >>>>>> +     spin_lock_irqsave(fence->lock, flags);
> > >>>>>> +
> > >>>>>> +     /* If we already have an earlier deadline, keep it: */
> > >>>>>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags) &&
> > >>>>>> +         ktime_before(fence->deadline, deadline)) {
> > >>>>>> +             spin_unlock_irqrestore(fence->lock, flags);
> > >>>>>> +             return;
> > >>>>>> +     }
> > >>>>>> +
> > >>>>>> +     fence->deadline = deadline;
> > >>>>>> +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags);
> > >>>>>> +
> > >>>>>> +     spin_unlock_irqrestore(fence->lock, flags);
> > >>>>>> +
> > >>>>>> +     if (fence->ops->set_deadline)
> > >>>>>> +             fence->ops->set_deadline(fence, deadline);
> > >>>>>> +}
> > >>>>>> +EXPORT_SYMBOL(dma_fence_set_deadline);
> > >>>>>> +
> > >>>>>>     /**
> > >>>>>>      * dma_fence_init - Initialize a custom fence.
> > >>>>>>      * @fence: the fence to initialize
> > >>>>>> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> > >>>>>> index 6ffb4b2c6371..4e6cfe4e6fbc 100644
> > >>>>>> --- a/include/linux/dma-fence.h
> > >>>>>> +++ b/include/linux/dma-fence.h
> > >>>>>> @@ -88,6 +88,7 @@ struct dma_fence {
> > >>>>>>                 /* @timestamp replaced by @rcu on
> > >>>>>> dma_fence_release() */
> > >>>>>>                 struct rcu_head rcu;
> > >>>>>>         };
> > >>>>>> +     ktime_t deadline;
> > >>>>> Mhm, adding the flag sounds ok to me but I'm a bit hesitating adding
> > >>>>> the
> > >>>>> deadline as extra field here.
> > >>>>>
> > >>>>> We tuned the dma_fence structure intentionally so that it is only 64
> > >>>>> bytes.
> > >>>> Hmm, then I guess you wouldn't be a fan of also adding an hrtimer?
> > >>>>
> > >>>> We could push the ktime_t (and timer) down into the derived fence
> > >>>> class, but I think there is going to need to be some extra storage
> > >>>> *somewhere*.. maybe the fence signaler could get away with just
> > >>>> storing the nearest upcoming deadline per fence-context instead?
> > >>> I would just push that into the driver instead.
> > >>>
> > >>> You most likely don't want the deadline per fence anyway in complex
> > >>> scenarios, but rather per frame. And a frame is usually composed from
> > >>> multiple fences.
> > > Right, I ended up keeping track of the nearest deadline in patch 5/4
> > > which added drm/msm support:
> > >
> > >    https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fpatch%2F447138%2F&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7Cce6ace85263d448bbc9f08d951d9f06c%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637630819606427306%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=ameszAOlClaZNeUDlYr37ZdIytVXNgiEUKuctjXLqZ0%3D&amp;reserved=0
> > >
> > > But if we do have the ktime_t in dma_fence in dma_fence, we can add
> > > some checks and avoid calling back to the driver if a later deadline
> > > is set on a fence that already has an earlier deadline.  OTOH I
> > > suppose I can push all that back to the driver to start, and we can
> > > revisit once we have more drivers implementing deadline support.
> >
> > I still think that all of this is rather specific to your use case and
> > have strong doubt that anybody else will implement that.
> 
> i915 does already have a similar thing in it's hand-rolled atomic
> commit path.  So I think msm won't be the only one.  It should be also
> useful to the other mobile GPUs with a gpu vs kms driver split,
> although looking at the other gpu devfreq implementations, I don't
> think they've yet gotten to this point in the fine tuning..

Yeah I have a dream that maybe i915 will use the atomic commit helpers, I
originally wrote them with i915 in mind :-) even had patches!

I also think we'll need this eventually in other areas, Android also has
some hacks like this to make sure idle->first touch doesn't suck and
similar things.
-Daniel

> 
> BR,
> -R
> 
> > >> Thinking more about it we could probably kill the spinlock pointer and
> > >> make the flags 32bit if we absolutely need that here.
> > > If we had a 'struct dma_fence_context' we could push the spinlock, ops
> > > pointer, and u64 context into that and replace with a single
> > > dma_fence_context ptr, fwiw
> >
> > That won't work. We have a lot of use cases where you can't allocate
> > memory, but must allocate a context.
> >
> > Christian.
> >
> > >
> > > BR,
> > > -R
> > >
> > >> But I still don't see the need for that, especially since most drivers
> > >> probably won't implement it.
> > >>
> > >> Regards,
> > >> Christian.
> > >>
> > >>> Regards,
> > >>> Christian.
> > >>>
> > >>>> BR,
> > >>>> -R
> > >>>>
> > >>>>> Regards,
> > >>>>> Christian.
> > >>>>>
> > >>>>>>         u64 context;
> > >>>>>>         u64 seqno;
> > >>>>>>         unsigned long flags;
> > >>>>>> @@ -99,6 +100,7 @@ enum dma_fence_flag_bits {
> > >>>>>>         DMA_FENCE_FLAG_SIGNALED_BIT,
> > >>>>>>         DMA_FENCE_FLAG_TIMESTAMP_BIT,
> > >>>>>>         DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> > >>>>>> +     DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
> > >>>>>>         DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
> > >>>>>>     };
> > >>>>>>
> > >>>>>> @@ -261,6 +263,19 @@ struct dma_fence_ops {
> > >>>>>>          */
> > >>>>>>         void (*timeline_value_str)(struct dma_fence *fence,
> > >>>>>>                                    char *str, int size);
> > >>>>>> +
> > >>>>>> +     /**
> > >>>>>> +      * @set_deadline:
> > >>>>>> +      *
> > >>>>>> +      * Callback to allow a fence waiter to inform the fence
> > >>>>>> signaler of an
> > >>>>>> +      * upcoming deadline, such as vblank, by which point the
> > >>>>>> waiter would
> > >>>>>> +      * prefer the fence to be signaled by.  This is intended to
> > >>>>>> give feedback
> > >>>>>> +      * to the fence signaler to aid in power management
> > >>>>>> decisions, such as
> > >>>>>> +      * boosting GPU frequency.
> > >>>>>> +      *
> > >>>>>> +      * This callback is optional.
> > >>>>>> +      */
> > >>>>>> +     void (*set_deadline)(struct dma_fence *fence, ktime_t deadline);
> > >>>>>>     };
> > >>>>>>
> > >>>>>>     void dma_fence_init(struct dma_fence *fence, const struct
> > >>>>>> dma_fence_ops *ops,
> > >>>>>> @@ -586,6 +601,8 @@ static inline signed long dma_fence_wait(struct
> > >>>>>> dma_fence *fence, bool intr)
> > >>>>>>         return ret < 0 ? ret : 0;
> > >>>>>>     }
> > >>>>>>
> > >>>>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t
> > >>>>>> deadline);
> > >>>>>> +
> > >>>>>>     struct dma_fence *dma_fence_get_stub(void);
> > >>>>>>     struct dma_fence *dma_fence_allocate_private_stub(void);
> > >>>>>>     u64 dma_fence_context_alloc(unsigned num);
> >
Rob Clark July 29, 2021, 3:23 p.m. UTC | #9
On Thu, Jul 29, 2021 at 12:03 AM Daniel Vetter <daniel@ffwll.ch> wrote:
>
> On Wed, Jul 28, 2021 at 10:58:51AM -0700, Rob Clark wrote:
> > On Wed, Jul 28, 2021 at 10:23 AM Christian König
> > <christian.koenig@amd.com> wrote:
> > >
> > >
> > >
> > > Am 28.07.21 um 17:15 schrieb Rob Clark:
> > > > On Wed, Jul 28, 2021 at 4:37 AM Christian König
> > > > <ckoenig.leichtzumerken@gmail.com> wrote:
> > > >> Am 28.07.21 um 09:03 schrieb Christian König:
> > > >>> Am 27.07.21 um 16:25 schrieb Rob Clark:
> > > >>>> On Tue, Jul 27, 2021 at 12:11 AM Christian König
> > > >>>> <ckoenig.leichtzumerken@gmail.com> wrote:
> > > >>>>> Am 27.07.21 um 01:38 schrieb Rob Clark:
> > > >>>>>> From: Rob Clark <robdclark@chromium.org>
> > > >>>>>>
> > > >>>>>> Add a way to hint to the fence signaler of an upcoming deadline,
> > > >>>>>> such as
> > > >>>>>> vblank, which the fence waiter would prefer not to miss. This is to
> > > >>>>>> aid
> > > >>>>>> the fence signaler in making power management decisions, like boosting
> > > >>>>>> frequency as the deadline approaches and awareness of missing
> > > >>>>>> deadlines
> > > >>>>>> so that can be factored in to the frequency scaling.
> > > >>>>>>
> > > >>>>>> Signed-off-by: Rob Clark <robdclark@chromium.org>
> > > >>>>>> ---
> > > >>>>>>     drivers/dma-buf/dma-fence.c | 39
> > > >>>>>> +++++++++++++++++++++++++++++++++++++
> > > >>>>>>     include/linux/dma-fence.h   | 17 ++++++++++++++++
> > > >>>>>>     2 files changed, 56 insertions(+)
> > > >>>>>>
> > > >>>>>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> > > >>>>>> index ce0f5eff575d..2e0d25ab457e 100644
> > > >>>>>> --- a/drivers/dma-buf/dma-fence.c
> > > >>>>>> +++ b/drivers/dma-buf/dma-fence.c
> > > >>>>>> @@ -910,6 +910,45 @@ dma_fence_wait_any_timeout(struct dma_fence
> > > >>>>>> **fences, uint32_t count,
> > > >>>>>>     }
> > > >>>>>>     EXPORT_SYMBOL(dma_fence_wait_any_timeout);
> > > >>>>>>
> > > >>>>>> +
> > > >>>>>> +/**
> > > >>>>>> + * dma_fence_set_deadline - set desired fence-wait deadline
> > > >>>>>> + * @fence:    the fence that is to be waited on
> > > >>>>>> + * @deadline: the time by which the waiter hopes for the fence to be
> > > >>>>>> + *            signaled
> > > >>>>>> + *
> > > >>>>>> + * Inform the fence signaler of an upcoming deadline, such as
> > > >>>>>> vblank, by
> > > >>>>>> + * which point the waiter would prefer the fence to be signaled
> > > >>>>>> by.  This
> > > >>>>>> + * is intended to give feedback to the fence signaler to aid in power
> > > >>>>>> + * management decisions, such as boosting GPU frequency if a periodic
> > > >>>>>> + * vblank deadline is approaching.
> > > >>>>>> + */
> > > >>>>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t
> > > >>>>>> deadline)
> > > >>>>>> +{
> > > >>>>>> +     unsigned long flags;
> > > >>>>>> +
> > > >>>>>> +     if (dma_fence_is_signaled(fence))
> > > >>>>>> +             return;
> > > >>>>>> +
> > > >>>>>> +     spin_lock_irqsave(fence->lock, flags);
> > > >>>>>> +
> > > >>>>>> +     /* If we already have an earlier deadline, keep it: */
> > > >>>>>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags) &&
> > > >>>>>> +         ktime_before(fence->deadline, deadline)) {
> > > >>>>>> +             spin_unlock_irqrestore(fence->lock, flags);
> > > >>>>>> +             return;
> > > >>>>>> +     }
> > > >>>>>> +
> > > >>>>>> +     fence->deadline = deadline;
> > > >>>>>> +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags);
> > > >>>>>> +
> > > >>>>>> +     spin_unlock_irqrestore(fence->lock, flags);
> > > >>>>>> +
> > > >>>>>> +     if (fence->ops->set_deadline)
> > > >>>>>> +             fence->ops->set_deadline(fence, deadline);
> > > >>>>>> +}
> > > >>>>>> +EXPORT_SYMBOL(dma_fence_set_deadline);
> > > >>>>>> +
> > > >>>>>>     /**
> > > >>>>>>      * dma_fence_init - Initialize a custom fence.
> > > >>>>>>      * @fence: the fence to initialize
> > > >>>>>> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> > > >>>>>> index 6ffb4b2c6371..4e6cfe4e6fbc 100644
> > > >>>>>> --- a/include/linux/dma-fence.h
> > > >>>>>> +++ b/include/linux/dma-fence.h
> > > >>>>>> @@ -88,6 +88,7 @@ struct dma_fence {
> > > >>>>>>                 /* @timestamp replaced by @rcu on
> > > >>>>>> dma_fence_release() */
> > > >>>>>>                 struct rcu_head rcu;
> > > >>>>>>         };
> > > >>>>>> +     ktime_t deadline;
> > > >>>>> Mhm, adding the flag sounds ok to me but I'm a bit hesitating adding
> > > >>>>> the
> > > >>>>> deadline as extra field here.
> > > >>>>>
> > > >>>>> We tuned the dma_fence structure intentionally so that it is only 64
> > > >>>>> bytes.
> > > >>>> Hmm, then I guess you wouldn't be a fan of also adding an hrtimer?
> > > >>>>
> > > >>>> We could push the ktime_t (and timer) down into the derived fence
> > > >>>> class, but I think there is going to need to be some extra storage
> > > >>>> *somewhere*.. maybe the fence signaler could get away with just
> > > >>>> storing the nearest upcoming deadline per fence-context instead?
> > > >>> I would just push that into the driver instead.
> > > >>>
> > > >>> You most likely don't want the deadline per fence anyway in complex
> > > >>> scenarios, but rather per frame. And a frame is usually composed from
> > > >>> multiple fences.
> > > > Right, I ended up keeping track of the nearest deadline in patch 5/4
> > > > which added drm/msm support:
> > > >
> > > >    https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fpatch%2F447138%2F&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7Cce6ace85263d448bbc9f08d951d9f06c%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637630819606427306%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=ameszAOlClaZNeUDlYr37ZdIytVXNgiEUKuctjXLqZ0%3D&amp;reserved=0
> > > >
> > > > But if we do have the ktime_t in dma_fence in dma_fence, we can add
> > > > some checks and avoid calling back to the driver if a later deadline
> > > > is set on a fence that already has an earlier deadline.  OTOH I
> > > > suppose I can push all that back to the driver to start, and we can
> > > > revisit once we have more drivers implementing deadline support.
> > >
> > > I still think that all of this is rather specific to your use case and
> > > have strong doubt that anybody else will implement that.
> >
> > i915 does already have a similar thing in it's hand-rolled atomic
> > commit path.  So I think msm won't be the only one.  It should be also
> > useful to the other mobile GPUs with a gpu vs kms driver split,
> > although looking at the other gpu devfreq implementations, I don't
> > think they've yet gotten to this point in the fine tuning..
>
> Yeah I have a dream that maybe i915 will use the atomic commit helpers, I
> originally wrote them with i915 in mind :-) even had patches!
>
> I also think we'll need this eventually in other areas, Android also has
> some hacks like this to make sure idle->first touch doesn't suck and
> similar things.

input-boost is another thing I have on my roadmap.. part of the solution is:

    commit 9bc95570175a7fbca29d86d22c54bbf399f4ad5a
    Author:     Rob Clark <robdclark@chromium.org>
    AuthorDate: Mon Jul 26 07:46:50 2021 -0700
    Commit:     Rob Clark <robdclark@chromium.org>
    CommitDate: Tue Jul 27 17:54:36 2021 -0700

        drm/msm: Devfreq tuning

which gives the freq a bit of a nudge if the GPU has been idle for
longer than a certain threshold.

But the other part is that if the GPU has been idle for more than 66ms
(typical autosuspend delay for adreno) it will suspend.  For modern
adreno's it takes ~2ms to "boot up" the GPU from suspend.  Which is
something you want to take out of the submit/execbuf path if you are
trying to reduce input-to-pageflip latency.

We have a downstream patch that boosts the CPUs on input events (with
a cooldown period to prevent spacebar-heater) and I have been thinking
of something along those lines to trigger resuming the GPU.. it is
straightforward enough for touch based devices, but gets more
complicated with keyboard input.  In particular, some keys you want to
trigger boost on key-release.  Ie. modifier keys (ctrl/shift/alt/etc..
the "search" key on chromebooks, etc) you want to boost on
key-release, not on key-press because unless you type *really* fast
you'll be in the cooldown period when the key-release event happens.
Unfortunately the kernel doesn't really know this "policy" sort of
information about which keys should boost on press vs release.  So I
think the long-term/upstream solution is to do input-boost in
userspace.. sysfs already has all the knobs that a userspace
input-boost daemon would need to twiddle, so no real need for this to
be in the kernel.  I guess the only drawback is the sysfs knobs are a
bit less standardized on the "desktop GPUs" which don't use devfreq.

BR,
-R

> -Daniel
>
> >
> > BR,
> > -R
> >
> > > >> Thinking more about it we could probably kill the spinlock pointer and
> > > >> make the flags 32bit if we absolutely need that here.
> > > > If we had a 'struct dma_fence_context' we could push the spinlock, ops
> > > > pointer, and u64 context into that and replace with a single
> > > > dma_fence_context ptr, fwiw
> > >
> > > That won't work. We have a lot of use cases where you can't allocate
> > > memory, but must allocate a context.
> > >
> > > Christian.
> > >
> > > >
> > > > BR,
> > > > -R
> > > >
> > > >> But I still don't see the need for that, especially since most drivers
> > > >> probably won't implement it.
> > > >>
> > > >> Regards,
> > > >> Christian.
> > > >>
> > > >>> Regards,
> > > >>> Christian.
> > > >>>
> > > >>>> BR,
> > > >>>> -R
> > > >>>>
> > > >>>>> Regards,
> > > >>>>> Christian.
> > > >>>>>
> > > >>>>>>         u64 context;
> > > >>>>>>         u64 seqno;
> > > >>>>>>         unsigned long flags;
> > > >>>>>> @@ -99,6 +100,7 @@ enum dma_fence_flag_bits {
> > > >>>>>>         DMA_FENCE_FLAG_SIGNALED_BIT,
> > > >>>>>>         DMA_FENCE_FLAG_TIMESTAMP_BIT,
> > > >>>>>>         DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> > > >>>>>> +     DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
> > > >>>>>>         DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
> > > >>>>>>     };
> > > >>>>>>
> > > >>>>>> @@ -261,6 +263,19 @@ struct dma_fence_ops {
> > > >>>>>>          */
> > > >>>>>>         void (*timeline_value_str)(struct dma_fence *fence,
> > > >>>>>>                                    char *str, int size);
> > > >>>>>> +
> > > >>>>>> +     /**
> > > >>>>>> +      * @set_deadline:
> > > >>>>>> +      *
> > > >>>>>> +      * Callback to allow a fence waiter to inform the fence
> > > >>>>>> signaler of an
> > > >>>>>> +      * upcoming deadline, such as vblank, by which point the
> > > >>>>>> waiter would
> > > >>>>>> +      * prefer the fence to be signaled by.  This is intended to
> > > >>>>>> give feedback
> > > >>>>>> +      * to the fence signaler to aid in power management
> > > >>>>>> decisions, such as
> > > >>>>>> +      * boosting GPU frequency.
> > > >>>>>> +      *
> > > >>>>>> +      * This callback is optional.
> > > >>>>>> +      */
> > > >>>>>> +     void (*set_deadline)(struct dma_fence *fence, ktime_t deadline);
> > > >>>>>>     };
> > > >>>>>>
> > > >>>>>>     void dma_fence_init(struct dma_fence *fence, const struct
> > > >>>>>> dma_fence_ops *ops,
> > > >>>>>> @@ -586,6 +601,8 @@ static inline signed long dma_fence_wait(struct
> > > >>>>>> dma_fence *fence, bool intr)
> > > >>>>>>         return ret < 0 ? ret : 0;
> > > >>>>>>     }
> > > >>>>>>
> > > >>>>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t
> > > >>>>>> deadline);
> > > >>>>>> +
> > > >>>>>>     struct dma_fence *dma_fence_get_stub(void);
> > > >>>>>>     struct dma_fence *dma_fence_allocate_private_stub(void);
> > > >>>>>>     u64 dma_fence_context_alloc(unsigned num);
> > >
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Daniel Vetter July 29, 2021, 4:18 p.m. UTC | #10
On Thu, Jul 29, 2021 at 5:19 PM Rob Clark <robdclark@gmail.com> wrote:
>
> On Thu, Jul 29, 2021 at 12:03 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> >
> > On Wed, Jul 28, 2021 at 10:58:51AM -0700, Rob Clark wrote:
> > > On Wed, Jul 28, 2021 at 10:23 AM Christian König
> > > <christian.koenig@amd.com> wrote:
> > > >
> > > >
> > > >
> > > > Am 28.07.21 um 17:15 schrieb Rob Clark:
> > > > > On Wed, Jul 28, 2021 at 4:37 AM Christian König
> > > > > <ckoenig.leichtzumerken@gmail.com> wrote:
> > > > >> Am 28.07.21 um 09:03 schrieb Christian König:
> > > > >>> Am 27.07.21 um 16:25 schrieb Rob Clark:
> > > > >>>> On Tue, Jul 27, 2021 at 12:11 AM Christian König
> > > > >>>> <ckoenig.leichtzumerken@gmail.com> wrote:
> > > > >>>>> Am 27.07.21 um 01:38 schrieb Rob Clark:
> > > > >>>>>> From: Rob Clark <robdclark@chromium.org>
> > > > >>>>>>
> > > > >>>>>> Add a way to hint to the fence signaler of an upcoming deadline,
> > > > >>>>>> such as
> > > > >>>>>> vblank, which the fence waiter would prefer not to miss. This is to
> > > > >>>>>> aid
> > > > >>>>>> the fence signaler in making power management decisions, like boosting
> > > > >>>>>> frequency as the deadline approaches and awareness of missing
> > > > >>>>>> deadlines
> > > > >>>>>> so that can be factored in to the frequency scaling.
> > > > >>>>>>
> > > > >>>>>> Signed-off-by: Rob Clark <robdclark@chromium.org>
> > > > >>>>>> ---
> > > > >>>>>>     drivers/dma-buf/dma-fence.c | 39
> > > > >>>>>> +++++++++++++++++++++++++++++++++++++
> > > > >>>>>>     include/linux/dma-fence.h   | 17 ++++++++++++++++
> > > > >>>>>>     2 files changed, 56 insertions(+)
> > > > >>>>>>
> > > > >>>>>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> > > > >>>>>> index ce0f5eff575d..2e0d25ab457e 100644
> > > > >>>>>> --- a/drivers/dma-buf/dma-fence.c
> > > > >>>>>> +++ b/drivers/dma-buf/dma-fence.c
> > > > >>>>>> @@ -910,6 +910,45 @@ dma_fence_wait_any_timeout(struct dma_fence
> > > > >>>>>> **fences, uint32_t count,
> > > > >>>>>>     }
> > > > >>>>>>     EXPORT_SYMBOL(dma_fence_wait_any_timeout);
> > > > >>>>>>
> > > > >>>>>> +
> > > > >>>>>> +/**
> > > > >>>>>> + * dma_fence_set_deadline - set desired fence-wait deadline
> > > > >>>>>> + * @fence:    the fence that is to be waited on
> > > > >>>>>> + * @deadline: the time by which the waiter hopes for the fence to be
> > > > >>>>>> + *            signaled
> > > > >>>>>> + *
> > > > >>>>>> + * Inform the fence signaler of an upcoming deadline, such as
> > > > >>>>>> vblank, by
> > > > >>>>>> + * which point the waiter would prefer the fence to be signaled
> > > > >>>>>> by.  This
> > > > >>>>>> + * is intended to give feedback to the fence signaler to aid in power
> > > > >>>>>> + * management decisions, such as boosting GPU frequency if a periodic
> > > > >>>>>> + * vblank deadline is approaching.
> > > > >>>>>> + */
> > > > >>>>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t
> > > > >>>>>> deadline)
> > > > >>>>>> +{
> > > > >>>>>> +     unsigned long flags;
> > > > >>>>>> +
> > > > >>>>>> +     if (dma_fence_is_signaled(fence))
> > > > >>>>>> +             return;
> > > > >>>>>> +
> > > > >>>>>> +     spin_lock_irqsave(fence->lock, flags);
> > > > >>>>>> +
> > > > >>>>>> +     /* If we already have an earlier deadline, keep it: */
> > > > >>>>>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags) &&
> > > > >>>>>> +         ktime_before(fence->deadline, deadline)) {
> > > > >>>>>> +             spin_unlock_irqrestore(fence->lock, flags);
> > > > >>>>>> +             return;
> > > > >>>>>> +     }
> > > > >>>>>> +
> > > > >>>>>> +     fence->deadline = deadline;
> > > > >>>>>> +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags);
> > > > >>>>>> +
> > > > >>>>>> +     spin_unlock_irqrestore(fence->lock, flags);
> > > > >>>>>> +
> > > > >>>>>> +     if (fence->ops->set_deadline)
> > > > >>>>>> +             fence->ops->set_deadline(fence, deadline);
> > > > >>>>>> +}
> > > > >>>>>> +EXPORT_SYMBOL(dma_fence_set_deadline);
> > > > >>>>>> +
> > > > >>>>>>     /**
> > > > >>>>>>      * dma_fence_init - Initialize a custom fence.
> > > > >>>>>>      * @fence: the fence to initialize
> > > > >>>>>> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> > > > >>>>>> index 6ffb4b2c6371..4e6cfe4e6fbc 100644
> > > > >>>>>> --- a/include/linux/dma-fence.h
> > > > >>>>>> +++ b/include/linux/dma-fence.h
> > > > >>>>>> @@ -88,6 +88,7 @@ struct dma_fence {
> > > > >>>>>>                 /* @timestamp replaced by @rcu on
> > > > >>>>>> dma_fence_release() */
> > > > >>>>>>                 struct rcu_head rcu;
> > > > >>>>>>         };
> > > > >>>>>> +     ktime_t deadline;
> > > > >>>>> Mhm, adding the flag sounds ok to me but I'm a bit hesitating adding
> > > > >>>>> the
> > > > >>>>> deadline as extra field here.
> > > > >>>>>
> > > > >>>>> We tuned the dma_fence structure intentionally so that it is only 64
> > > > >>>>> bytes.
> > > > >>>> Hmm, then I guess you wouldn't be a fan of also adding an hrtimer?
> > > > >>>>
> > > > >>>> We could push the ktime_t (and timer) down into the derived fence
> > > > >>>> class, but I think there is going to need to be some extra storage
> > > > >>>> *somewhere*.. maybe the fence signaler could get away with just
> > > > >>>> storing the nearest upcoming deadline per fence-context instead?
> > > > >>> I would just push that into the driver instead.
> > > > >>>
> > > > >>> You most likely don't want the deadline per fence anyway in complex
> > > > >>> scenarios, but rather per frame. And a frame is usually composed from
> > > > >>> multiple fences.
> > > > > Right, I ended up keeping track of the nearest deadline in patch 5/4
> > > > > which added drm/msm support:
> > > > >
> > > > >    https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fpatch%2F447138%2F&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7Cce6ace85263d448bbc9f08d951d9f06c%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637630819606427306%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=ameszAOlClaZNeUDlYr37ZdIytVXNgiEUKuctjXLqZ0%3D&amp;reserved=0
> > > > >
> > > > > But if we do have the ktime_t in dma_fence in dma_fence, we can add
> > > > > some checks and avoid calling back to the driver if a later deadline
> > > > > is set on a fence that already has an earlier deadline.  OTOH I
> > > > > suppose I can push all that back to the driver to start, and we can
> > > > > revisit once we have more drivers implementing deadline support.
> > > >
> > > > I still think that all of this is rather specific to your use case and
> > > > have strong doubt that anybody else will implement that.
> > >
> > > i915 does already have a similar thing in it's hand-rolled atomic
> > > commit path.  So I think msm won't be the only one.  It should be also
> > > useful to the other mobile GPUs with a gpu vs kms driver split,
> > > although looking at the other gpu devfreq implementations, I don't
> > > think they've yet gotten to this point in the fine tuning..
> >
> > Yeah I have a dream that maybe i915 will use the atomic commit helpers, I
> > originally wrote them with i915 in mind :-) even had patches!
> >
> > I also think we'll need this eventually in other areas, Android also has
> > some hacks like this to make sure idle->first touch doesn't suck and
> > similar things.
>
> input-boost is another thing I have on my roadmap.. part of the solution is:
>
>     commit 9bc95570175a7fbca29d86d22c54bbf399f4ad5a
>     Author:     Rob Clark <robdclark@chromium.org>
>     AuthorDate: Mon Jul 26 07:46:50 2021 -0700
>     Commit:     Rob Clark <robdclark@chromium.org>
>     CommitDate: Tue Jul 27 17:54:36 2021 -0700
>
>         drm/msm: Devfreq tuning
>
> which gives the freq a bit of a nudge if the GPU has been idle for
> longer than a certain threshold.
>
> But the other part is that if the GPU has been idle for more than 66ms
> (typical autosuspend delay for adreno) it will suspend.  For modern
> adreno's it takes ~2ms to "boot up" the GPU from suspend.  Which is
> something you want to take out of the submit/execbuf path if you are
> trying to reduce input-to-pageflip latency.
>
> We have a downstream patch that boosts the CPUs on input events (with
> a cooldown period to prevent spacebar-heater) and I have been thinking
> of something along those lines to trigger resuming the GPU.. it is
> straightforward enough for touch based devices, but gets more
> complicated with keyboard input.  In particular, some keys you want to
> trigger boost on key-release.  Ie. modifier keys (ctrl/shift/alt/etc..
> the "search" key on chromebooks, etc) you want to boost on
> key-release, not on key-press because unless you type *really* fast
> you'll be in the cooldown period when the key-release event happens.
> Unfortunately the kernel doesn't really know this "policy" sort of
> information about which keys should boost on press vs release.  So I
> think the long-term/upstream solution is to do input-boost in
> userspace.. sysfs already has all the knobs that a userspace
> input-boost daemon would need to twiddle, so no real need for this to
> be in the kernel.  I guess the only drawback is the sysfs knobs are a
> bit less standardized on the "desktop GPUs" which don't use devfreq.

I think we could do a standard interface for this, either on the drm
owner/master or somewhere in sysfs. Essentially "I expect to use the
gpu for the very next frame, get it going". Across all hw there's a
lot of things we can do. I think abuse is pretty easy to prevent with
a cooldown or similar.
-Daniel

>
> BR,
> -R
>
> > -Daniel
> >
> > >
> > > BR,
> > > -R
> > >
> > > > >> Thinking more about it we could probably kill the spinlock pointer and
> > > > >> make the flags 32bit if we absolutely need that here.
> > > > > If we had a 'struct dma_fence_context' we could push the spinlock, ops
> > > > > pointer, and u64 context into that and replace with a single
> > > > > dma_fence_context ptr, fwiw
> > > >
> > > > That won't work. We have a lot of use cases where you can't allocate
> > > > memory, but must allocate a context.
> > > >
> > > > Christian.
> > > >
> > > > >
> > > > > BR,
> > > > > -R
> > > > >
> > > > >> But I still don't see the need for that, especially since most drivers
> > > > >> probably won't implement it.
> > > > >>
> > > > >> Regards,
> > > > >> Christian.
> > > > >>
> > > > >>> Regards,
> > > > >>> Christian.
> > > > >>>
> > > > >>>> BR,
> > > > >>>> -R
> > > > >>>>
> > > > >>>>> Regards,
> > > > >>>>> Christian.
> > > > >>>>>
> > > > >>>>>>         u64 context;
> > > > >>>>>>         u64 seqno;
> > > > >>>>>>         unsigned long flags;
> > > > >>>>>> @@ -99,6 +100,7 @@ enum dma_fence_flag_bits {
> > > > >>>>>>         DMA_FENCE_FLAG_SIGNALED_BIT,
> > > > >>>>>>         DMA_FENCE_FLAG_TIMESTAMP_BIT,
> > > > >>>>>>         DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> > > > >>>>>> +     DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
> > > > >>>>>>         DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
> > > > >>>>>>     };
> > > > >>>>>>
> > > > >>>>>> @@ -261,6 +263,19 @@ struct dma_fence_ops {
> > > > >>>>>>          */
> > > > >>>>>>         void (*timeline_value_str)(struct dma_fence *fence,
> > > > >>>>>>                                    char *str, int size);
> > > > >>>>>> +
> > > > >>>>>> +     /**
> > > > >>>>>> +      * @set_deadline:
> > > > >>>>>> +      *
> > > > >>>>>> +      * Callback to allow a fence waiter to inform the fence
> > > > >>>>>> signaler of an
> > > > >>>>>> +      * upcoming deadline, such as vblank, by which point the
> > > > >>>>>> waiter would
> > > > >>>>>> +      * prefer the fence to be signaled by.  This is intended to
> > > > >>>>>> give feedback
> > > > >>>>>> +      * to the fence signaler to aid in power management
> > > > >>>>>> decisions, such as
> > > > >>>>>> +      * boosting GPU frequency.
> > > > >>>>>> +      *
> > > > >>>>>> +      * This callback is optional.
> > > > >>>>>> +      */
> > > > >>>>>> +     void (*set_deadline)(struct dma_fence *fence, ktime_t deadline);
> > > > >>>>>>     };
> > > > >>>>>>
> > > > >>>>>>     void dma_fence_init(struct dma_fence *fence, const struct
> > > > >>>>>> dma_fence_ops *ops,
> > > > >>>>>> @@ -586,6 +601,8 @@ static inline signed long dma_fence_wait(struct
> > > > >>>>>> dma_fence *fence, bool intr)
> > > > >>>>>>         return ret < 0 ? ret : 0;
> > > > >>>>>>     }
> > > > >>>>>>
> > > > >>>>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t
> > > > >>>>>> deadline);
> > > > >>>>>> +
> > > > >>>>>>     struct dma_fence *dma_fence_get_stub(void);
> > > > >>>>>>     struct dma_fence *dma_fence_allocate_private_stub(void);
> > > > >>>>>>     u64 dma_fence_context_alloc(unsigned num);
> > > >
> >
> > --
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > http://blog.ffwll.ch
Rob Clark July 29, 2021, 5:32 p.m. UTC | #11
On Thu, Jul 29, 2021 at 9:18 AM Daniel Vetter <daniel@ffwll.ch> wrote:
>
> On Thu, Jul 29, 2021 at 5:19 PM Rob Clark <robdclark@gmail.com> wrote:
> >
> > On Thu, Jul 29, 2021 at 12:03 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> > >
> > > On Wed, Jul 28, 2021 at 10:58:51AM -0700, Rob Clark wrote:
> > > > On Wed, Jul 28, 2021 at 10:23 AM Christian König
> > > > <christian.koenig@amd.com> wrote:
> > > > >
> > > > >
> > > > >
> > > > > Am 28.07.21 um 17:15 schrieb Rob Clark:
> > > > > > On Wed, Jul 28, 2021 at 4:37 AM Christian König
> > > > > > <ckoenig.leichtzumerken@gmail.com> wrote:
> > > > > >> Am 28.07.21 um 09:03 schrieb Christian König:
> > > > > >>> Am 27.07.21 um 16:25 schrieb Rob Clark:
> > > > > >>>> On Tue, Jul 27, 2021 at 12:11 AM Christian König
> > > > > >>>> <ckoenig.leichtzumerken@gmail.com> wrote:
> > > > > >>>>> Am 27.07.21 um 01:38 schrieb Rob Clark:
> > > > > >>>>>> From: Rob Clark <robdclark@chromium.org>
> > > > > >>>>>>
> > > > > >>>>>> Add a way to hint to the fence signaler of an upcoming deadline,
> > > > > >>>>>> such as
> > > > > >>>>>> vblank, which the fence waiter would prefer not to miss. This is to
> > > > > >>>>>> aid
> > > > > >>>>>> the fence signaler in making power management decisions, like boosting
> > > > > >>>>>> frequency as the deadline approaches and awareness of missing
> > > > > >>>>>> deadlines
> > > > > >>>>>> so that can be factored in to the frequency scaling.
> > > > > >>>>>>
> > > > > >>>>>> Signed-off-by: Rob Clark <robdclark@chromium.org>
> > > > > >>>>>> ---
> > > > > >>>>>>     drivers/dma-buf/dma-fence.c | 39
> > > > > >>>>>> +++++++++++++++++++++++++++++++++++++
> > > > > >>>>>>     include/linux/dma-fence.h   | 17 ++++++++++++++++
> > > > > >>>>>>     2 files changed, 56 insertions(+)
> > > > > >>>>>>
> > > > > >>>>>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> > > > > >>>>>> index ce0f5eff575d..2e0d25ab457e 100644
> > > > > >>>>>> --- a/drivers/dma-buf/dma-fence.c
> > > > > >>>>>> +++ b/drivers/dma-buf/dma-fence.c
> > > > > >>>>>> @@ -910,6 +910,45 @@ dma_fence_wait_any_timeout(struct dma_fence
> > > > > >>>>>> **fences, uint32_t count,
> > > > > >>>>>>     }
> > > > > >>>>>>     EXPORT_SYMBOL(dma_fence_wait_any_timeout);
> > > > > >>>>>>
> > > > > >>>>>> +
> > > > > >>>>>> +/**
> > > > > >>>>>> + * dma_fence_set_deadline - set desired fence-wait deadline
> > > > > >>>>>> + * @fence:    the fence that is to be waited on
> > > > > >>>>>> + * @deadline: the time by which the waiter hopes for the fence to be
> > > > > >>>>>> + *            signaled
> > > > > >>>>>> + *
> > > > > >>>>>> + * Inform the fence signaler of an upcoming deadline, such as
> > > > > >>>>>> vblank, by
> > > > > >>>>>> + * which point the waiter would prefer the fence to be signaled
> > > > > >>>>>> by.  This
> > > > > >>>>>> + * is intended to give feedback to the fence signaler to aid in power
> > > > > >>>>>> + * management decisions, such as boosting GPU frequency if a periodic
> > > > > >>>>>> + * vblank deadline is approaching.
> > > > > >>>>>> + */
> > > > > >>>>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t
> > > > > >>>>>> deadline)
> > > > > >>>>>> +{
> > > > > >>>>>> +     unsigned long flags;
> > > > > >>>>>> +
> > > > > >>>>>> +     if (dma_fence_is_signaled(fence))
> > > > > >>>>>> +             return;
> > > > > >>>>>> +
> > > > > >>>>>> +     spin_lock_irqsave(fence->lock, flags);
> > > > > >>>>>> +
> > > > > >>>>>> +     /* If we already have an earlier deadline, keep it: */
> > > > > >>>>>> +     if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags) &&
> > > > > >>>>>> +         ktime_before(fence->deadline, deadline)) {
> > > > > >>>>>> +             spin_unlock_irqrestore(fence->lock, flags);
> > > > > >>>>>> +             return;
> > > > > >>>>>> +     }
> > > > > >>>>>> +
> > > > > >>>>>> +     fence->deadline = deadline;
> > > > > >>>>>> +     set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags);
> > > > > >>>>>> +
> > > > > >>>>>> +     spin_unlock_irqrestore(fence->lock, flags);
> > > > > >>>>>> +
> > > > > >>>>>> +     if (fence->ops->set_deadline)
> > > > > >>>>>> +             fence->ops->set_deadline(fence, deadline);
> > > > > >>>>>> +}
> > > > > >>>>>> +EXPORT_SYMBOL(dma_fence_set_deadline);
> > > > > >>>>>> +
> > > > > >>>>>>     /**
> > > > > >>>>>>      * dma_fence_init - Initialize a custom fence.
> > > > > >>>>>>      * @fence: the fence to initialize
> > > > > >>>>>> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> > > > > >>>>>> index 6ffb4b2c6371..4e6cfe4e6fbc 100644
> > > > > >>>>>> --- a/include/linux/dma-fence.h
> > > > > >>>>>> +++ b/include/linux/dma-fence.h
> > > > > >>>>>> @@ -88,6 +88,7 @@ struct dma_fence {
> > > > > >>>>>>                 /* @timestamp replaced by @rcu on
> > > > > >>>>>> dma_fence_release() */
> > > > > >>>>>>                 struct rcu_head rcu;
> > > > > >>>>>>         };
> > > > > >>>>>> +     ktime_t deadline;
> > > > > >>>>> Mhm, adding the flag sounds ok to me but I'm a bit hesitating adding
> > > > > >>>>> the
> > > > > >>>>> deadline as extra field here.
> > > > > >>>>>
> > > > > >>>>> We tuned the dma_fence structure intentionally so that it is only 64
> > > > > >>>>> bytes.
> > > > > >>>> Hmm, then I guess you wouldn't be a fan of also adding an hrtimer?
> > > > > >>>>
> > > > > >>>> We could push the ktime_t (and timer) down into the derived fence
> > > > > >>>> class, but I think there is going to need to be some extra storage
> > > > > >>>> *somewhere*.. maybe the fence signaler could get away with just
> > > > > >>>> storing the nearest upcoming deadline per fence-context instead?
> > > > > >>> I would just push that into the driver instead.
> > > > > >>>
> > > > > >>> You most likely don't want the deadline per fence anyway in complex
> > > > > >>> scenarios, but rather per frame. And a frame is usually composed from
> > > > > >>> multiple fences.
> > > > > > Right, I ended up keeping track of the nearest deadline in patch 5/4
> > > > > > which added drm/msm support:
> > > > > >
> > > > > >    https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fpatch%2F447138%2F&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7Cce6ace85263d448bbc9f08d951d9f06c%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637630819606427306%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=ameszAOlClaZNeUDlYr37ZdIytVXNgiEUKuctjXLqZ0%3D&amp;reserved=0
> > > > > >
> > > > > > But if we do have the ktime_t in dma_fence in dma_fence, we can add
> > > > > > some checks and avoid calling back to the driver if a later deadline
> > > > > > is set on a fence that already has an earlier deadline.  OTOH I
> > > > > > suppose I can push all that back to the driver to start, and we can
> > > > > > revisit once we have more drivers implementing deadline support.
> > > > >
> > > > > I still think that all of this is rather specific to your use case and
> > > > > have strong doubt that anybody else will implement that.
> > > >
> > > > i915 does already have a similar thing in it's hand-rolled atomic
> > > > commit path.  So I think msm won't be the only one.  It should be also
> > > > useful to the other mobile GPUs with a gpu vs kms driver split,
> > > > although looking at the other gpu devfreq implementations, I don't
> > > > think they've yet gotten to this point in the fine tuning..
> > >
> > > Yeah I have a dream that maybe i915 will use the atomic commit helpers, I
> > > originally wrote them with i915 in mind :-) even had patches!
> > >
> > > I also think we'll need this eventually in other areas, Android also has
> > > some hacks like this to make sure idle->first touch doesn't suck and
> > > similar things.
> >
> > input-boost is another thing I have on my roadmap.. part of the solution is:
> >
> >     commit 9bc95570175a7fbca29d86d22c54bbf399f4ad5a
> >     Author:     Rob Clark <robdclark@chromium.org>
> >     AuthorDate: Mon Jul 26 07:46:50 2021 -0700
> >     Commit:     Rob Clark <robdclark@chromium.org>
> >     CommitDate: Tue Jul 27 17:54:36 2021 -0700
> >
> >         drm/msm: Devfreq tuning
> >
> > which gives the freq a bit of a nudge if the GPU has been idle for
> > longer than a certain threshold.
> >
> > But the other part is that if the GPU has been idle for more than 66ms
> > (typical autosuspend delay for adreno) it will suspend.  For modern
> > adreno's it takes ~2ms to "boot up" the GPU from suspend.  Which is
> > something you want to take out of the submit/execbuf path if you are
> > trying to reduce input-to-pageflip latency.
> >
> > We have a downstream patch that boosts the CPUs on input events (with
> > a cooldown period to prevent spacebar-heater) and I have been thinking
> > of something along those lines to trigger resuming the GPU.. it is
> > straightforward enough for touch based devices, but gets more
> > complicated with keyboard input.  In particular, some keys you want to
> > trigger boost on key-release.  Ie. modifier keys (ctrl/shift/alt/etc..
> > the "search" key on chromebooks, etc) you want to boost on
> > key-release, not on key-press because unless you type *really* fast
> > you'll be in the cooldown period when the key-release event happens.
> > Unfortunately the kernel doesn't really know this "policy" sort of
> > information about which keys should boost on press vs release.  So I
> > think the long-term/upstream solution is to do input-boost in
> > userspace.. sysfs already has all the knobs that a userspace
> > input-boost daemon would need to twiddle, so no real need for this to
> > be in the kernel.  I guess the only drawback is the sysfs knobs are a
> > bit less standardized on the "desktop GPUs" which don't use devfreq.
>
> I think we could do a standard interface for this, either on the drm
> owner/master or somewhere in sysfs. Essentially "I expect to use the
> gpu for the very next frame, get it going". Across all hw there's a
> lot of things we can do. I think abuse is pretty easy to prevent with
> a cooldown or similar.

The userspace input-boost ends up needing to be either part of the
compositor, or a privileged process, in order to sniff input events..
so I don't think the kernel needs to try to prevent abuse here (but
the userspace part defn wants a cooldown period)

BR,
-R

> -Daniel
>
> >
> > BR,
> > -R
> >
> > > -Daniel
> > >
> > > >
> > > > BR,
> > > > -R
> > > >
> > > > > >> Thinking more about it we could probably kill the spinlock pointer and
> > > > > >> make the flags 32bit if we absolutely need that here.
> > > > > > If we had a 'struct dma_fence_context' we could push the spinlock, ops
> > > > > > pointer, and u64 context into that and replace with a single
> > > > > > dma_fence_context ptr, fwiw
> > > > >
> > > > > That won't work. We have a lot of use cases where you can't allocate
> > > > > memory, but must allocate a context.
> > > > >
> > > > > Christian.
> > > > >
> > > > > >
> > > > > > BR,
> > > > > > -R
> > > > > >
> > > > > >> But I still don't see the need for that, especially since most drivers
> > > > > >> probably won't implement it.
> > > > > >>
> > > > > >> Regards,
> > > > > >> Christian.
> > > > > >>
> > > > > >>> Regards,
> > > > > >>> Christian.
> > > > > >>>
> > > > > >>>> BR,
> > > > > >>>> -R
> > > > > >>>>
> > > > > >>>>> Regards,
> > > > > >>>>> Christian.
> > > > > >>>>>
> > > > > >>>>>>         u64 context;
> > > > > >>>>>>         u64 seqno;
> > > > > >>>>>>         unsigned long flags;
> > > > > >>>>>> @@ -99,6 +100,7 @@ enum dma_fence_flag_bits {
> > > > > >>>>>>         DMA_FENCE_FLAG_SIGNALED_BIT,
> > > > > >>>>>>         DMA_FENCE_FLAG_TIMESTAMP_BIT,
> > > > > >>>>>>         DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> > > > > >>>>>> +     DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
> > > > > >>>>>>         DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
> > > > > >>>>>>     };
> > > > > >>>>>>
> > > > > >>>>>> @@ -261,6 +263,19 @@ struct dma_fence_ops {
> > > > > >>>>>>          */
> > > > > >>>>>>         void (*timeline_value_str)(struct dma_fence *fence,
> > > > > >>>>>>                                    char *str, int size);
> > > > > >>>>>> +
> > > > > >>>>>> +     /**
> > > > > >>>>>> +      * @set_deadline:
> > > > > >>>>>> +      *
> > > > > >>>>>> +      * Callback to allow a fence waiter to inform the fence
> > > > > >>>>>> signaler of an
> > > > > >>>>>> +      * upcoming deadline, such as vblank, by which point the
> > > > > >>>>>> waiter would
> > > > > >>>>>> +      * prefer the fence to be signaled by.  This is intended to
> > > > > >>>>>> give feedback
> > > > > >>>>>> +      * to the fence signaler to aid in power management
> > > > > >>>>>> decisions, such as
> > > > > >>>>>> +      * boosting GPU frequency.
> > > > > >>>>>> +      *
> > > > > >>>>>> +      * This callback is optional.
> > > > > >>>>>> +      */
> > > > > >>>>>> +     void (*set_deadline)(struct dma_fence *fence, ktime_t deadline);
> > > > > >>>>>>     };
> > > > > >>>>>>
> > > > > >>>>>>     void dma_fence_init(struct dma_fence *fence, const struct
> > > > > >>>>>> dma_fence_ops *ops,
> > > > > >>>>>> @@ -586,6 +601,8 @@ static inline signed long dma_fence_wait(struct
> > > > > >>>>>> dma_fence *fence, bool intr)
> > > > > >>>>>>         return ret < 0 ? ret : 0;
> > > > > >>>>>>     }
> > > > > >>>>>>
> > > > > >>>>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t
> > > > > >>>>>> deadline);
> > > > > >>>>>> +
> > > > > >>>>>>     struct dma_fence *dma_fence_get_stub(void);
> > > > > >>>>>>     struct dma_fence *dma_fence_allocate_private_stub(void);
> > > > > >>>>>>     u64 dma_fence_context_alloc(unsigned num);
> > > > >
> > >
> > > --
> > > Daniel Vetter
> > > Software Engineer, Intel Corporation
> > > http://blog.ffwll.ch
>
>
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
diff mbox series

Patch

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index ce0f5eff575d..2e0d25ab457e 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -910,6 +910,45 @@  dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
 }
 EXPORT_SYMBOL(dma_fence_wait_any_timeout);
 
+
+/**
+ * dma_fence_set_deadline - set desired fence-wait deadline
+ * @fence:    the fence that is to be waited on
+ * @deadline: the time by which the waiter hopes for the fence to be
+ *            signaled
+ *
+ * Inform the fence signaler of an upcoming deadline, such as vblank, by
+ * which point the waiter would prefer the fence to be signaled by.  This
+ * is intended to give feedback to the fence signaler to aid in power
+ * management decisions, such as boosting GPU frequency if a periodic
+ * vblank deadline is approaching.
+ */
+void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
+{
+	unsigned long flags;
+
+	if (dma_fence_is_signaled(fence))
+		return;
+
+	spin_lock_irqsave(fence->lock, flags);
+
+	/* If we already have an earlier deadline, keep it: */
+	if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags) &&
+	    ktime_before(fence->deadline, deadline)) {
+		spin_unlock_irqrestore(fence->lock, flags);
+		return;
+	}
+
+	fence->deadline = deadline;
+	set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &fence->flags);
+
+	spin_unlock_irqrestore(fence->lock, flags);
+
+	if (fence->ops->set_deadline)
+		fence->ops->set_deadline(fence, deadline);
+}
+EXPORT_SYMBOL(dma_fence_set_deadline);
+
 /**
  * dma_fence_init - Initialize a custom fence.
  * @fence: the fence to initialize
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 6ffb4b2c6371..4e6cfe4e6fbc 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -88,6 +88,7 @@  struct dma_fence {
 		/* @timestamp replaced by @rcu on dma_fence_release() */
 		struct rcu_head rcu;
 	};
+	ktime_t deadline;
 	u64 context;
 	u64 seqno;
 	unsigned long flags;
@@ -99,6 +100,7 @@  enum dma_fence_flag_bits {
 	DMA_FENCE_FLAG_SIGNALED_BIT,
 	DMA_FENCE_FLAG_TIMESTAMP_BIT,
 	DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+	DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
 	DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
 };
 
@@ -261,6 +263,19 @@  struct dma_fence_ops {
 	 */
 	void (*timeline_value_str)(struct dma_fence *fence,
 				   char *str, int size);
+
+	/**
+	 * @set_deadline:
+	 *
+	 * Callback to allow a fence waiter to inform the fence signaler of an
+	 * upcoming deadline, such as vblank, by which point the waiter would
+	 * prefer the fence to be signaled by.  This is intended to give feedback
+	 * to the fence signaler to aid in power management decisions, such as
+	 * boosting GPU frequency.
+	 *
+	 * This callback is optional.
+	 */
+	void (*set_deadline)(struct dma_fence *fence, ktime_t deadline);
 };
 
 void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
@@ -586,6 +601,8 @@  static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr)
 	return ret < 0 ? ret : 0;
 }
 
+void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline);
+
 struct dma_fence *dma_fence_get_stub(void);
 struct dma_fence *dma_fence_allocate_private_stub(void);
 u64 dma_fence_context_alloc(unsigned num);