Message ID | 20241212202337.381614-3-mcanal@igalia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/vc4: Clean-up the BO seqnos | expand |
On 12/12, Maíra Canal wrote: > As the BOs used by VC4 have DMA Reservation Objects attached to it, > there is no need to use seqnos wait for the BOs availability. Instead, > we can use `dma_gem_dma_resv_wait()`. typo: drm_gem_dma_resc_wait() > > Signed-off-by: Maíra Canal <mcanal@igalia.com> > --- > drivers/gpu/drm/vc4/vc4_drv.h | 10 ++++++++++ > drivers/gpu/drm/vc4/vc4_gem.c | 31 +++++++++++++++++++------------ > 2 files changed, 29 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h > index 4a078ffd9f82..03ed40ab9a93 100644 > --- a/drivers/gpu/drm/vc4/vc4_drv.h > +++ b/drivers/gpu/drm/vc4/vc4_drv.h > @@ -924,6 +924,16 @@ struct vc4_validated_shader_info { > (Wmax)) > #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) > > +static inline unsigned long nsecs_to_jiffies_timeout(const u64 n) > +{ > + /* nsecs_to_jiffies64() does not guard against overflow */ > + if ((NSEC_PER_SEC % HZ) != 0 && > + div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ) > + return MAX_JIFFY_OFFSET; > + > + return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1); > +} > + Should nsecs_to_jiffies_timeout become common code? > /* vc4_bo.c */ > struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size); > struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size, > diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c > index 1021f45cb53c..4037c65eb269 100644 > --- a/drivers/gpu/drm/vc4/vc4_gem.c > +++ b/drivers/gpu/drm/vc4/vc4_gem.c > @@ -1017,11 +1017,13 @@ int > vc4_wait_bo_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_priv) > { > - struct vc4_dev *vc4 = to_vc4_dev(dev); > int ret; > + struct vc4_dev *vc4 = to_vc4_dev(dev); > struct drm_vc4_wait_bo *args = data; > - struct drm_gem_object *gem_obj; > - struct vc4_bo *bo; > + unsigned long timeout_jiffies = > + nsecs_to_jiffies_timeout(args->timeout_ns); > + ktime_t start = ktime_get(); > + u64 delta_ns; > > if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) > return -ENODEV; > @@ -1029,17 +1031,22 @@ vc4_wait_bo_ioctl(struct drm_device *dev, void *data, > if (args->pad != 0) > return -EINVAL; > > - gem_obj = drm_gem_object_lookup(file_priv, args->handle); > - if (!gem_obj) { > - DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); > - return -EINVAL; > - } > - bo = to_vc4_bo(gem_obj); > + ret = drm_gem_dma_resv_wait(file_priv, args->handle, > + true, timeout_jiffies); > > - ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, > - &args->timeout_ns); > + /* Decrement the user's timeout, in case we got interrupted > + * such that the ioctl will be restarted. > + */ > + delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start)); > + if (delta_ns < args->timeout_ns) > + args->timeout_ns -= delta_ns; > + else > + args->timeout_ns = 0; > + > + /* Asked to wait beyond the jiffy/scheduler precision? */ > + if (ret == -ETIME && args->timeout_ns) > + ret = -EAGAIN; > > - drm_gem_object_put(gem_obj); Just left some comments above. This change sounds sensible and the patch LGTM. Thanks! Reviewed-by: Melissa Wen <mwen@igalia.com> > return ret; > } > > -- > 2.47.1 >
On 12/12/2024 20:20, Maíra Canal wrote: > As the BOs used by VC4 have DMA Reservation Objects attached to it, > there is no need to use seqnos wait for the BOs availability. Instead, > we can use `dma_gem_dma_resv_wait()`. > > Signed-off-by: Maíra Canal <mcanal@igalia.com> > --- > drivers/gpu/drm/vc4/vc4_drv.h | 10 ++++++++++ > drivers/gpu/drm/vc4/vc4_gem.c | 31 +++++++++++++++++++------------ > 2 files changed, 29 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h > index 4a078ffd9f82..03ed40ab9a93 100644 > --- a/drivers/gpu/drm/vc4/vc4_drv.h > +++ b/drivers/gpu/drm/vc4/vc4_drv.h > @@ -924,6 +924,16 @@ struct vc4_validated_shader_info { > (Wmax)) > #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) > > +static inline unsigned long nsecs_to_jiffies_timeout(const u64 n) > +{ > + /* nsecs_to_jiffies64() does not guard against overflow */ > + if ((NSEC_PER_SEC % HZ) != 0 && > + div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ) > + return MAX_JIFFY_OFFSET; > + > + return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1); > +} > + > /* vc4_bo.c */ > struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size); > struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size, > diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c > index 1021f45cb53c..4037c65eb269 100644 > --- a/drivers/gpu/drm/vc4/vc4_gem.c > +++ b/drivers/gpu/drm/vc4/vc4_gem.c > @@ -1017,11 +1017,13 @@ int > vc4_wait_bo_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_priv) > { > - struct vc4_dev *vc4 = to_vc4_dev(dev); > int ret; > + struct vc4_dev *vc4 = to_vc4_dev(dev); > struct drm_vc4_wait_bo *args = data; > - struct drm_gem_object *gem_obj; > - struct vc4_bo *bo; > + unsigned long timeout_jiffies = > + nsecs_to_jiffies_timeout(args->timeout_ns); What about a potentially simpler alternative: timeout_jiffies = usecs_to_jiffies(div_u64(args->timeout_ns, 1000)); No need for a new helper and code cannot sleep for less than one jiffy anyway. > + ktime_t start = ktime_get(); > + u64 delta_ns; > > if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) > return -ENODEV; > @@ -1029,17 +1031,22 @@ vc4_wait_bo_ioctl(struct drm_device *dev, void *data, > if (args->pad != 0) > return -EINVAL; > > - gem_obj = drm_gem_object_lookup(file_priv, args->handle); > - if (!gem_obj) { > - DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); > - return -EINVAL; > - } > - bo = to_vc4_bo(gem_obj); > + ret = drm_gem_dma_resv_wait(file_priv, args->handle, > + true, timeout_jiffies); > > - ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, > - &args->timeout_ns); > + /* Decrement the user's timeout, in case we got interrupted > + * such that the ioctl will be restarted. > + */ > + delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start)); > + if (delta_ns < args->timeout_ns) > + args->timeout_ns -= delta_ns; > + else > + args->timeout_ns = 0; > + > + /* Asked to wait beyond the jiffy/scheduler precision? */ > + if (ret == -ETIME && args->timeout_ns) > + ret = -EAGAIN; EAGAIN is new from here, right? Will userspace cope in the right way? Regards, Tvrtko > > - drm_gem_object_put(gem_obj); > return ret; > } >
Hi Tvrtko, On 18/12/24 07:41, Tvrtko Ursulin wrote: > > On 12/12/2024 20:20, Maíra Canal wrote: >> As the BOs used by VC4 have DMA Reservation Objects attached to it, >> there is no need to use seqnos wait for the BOs availability. Instead, >> we can use `dma_gem_dma_resv_wait()`. >> >> Signed-off-by: Maíra Canal <mcanal@igalia.com> >> --- >> drivers/gpu/drm/vc4/vc4_drv.h | 10 ++++++++++ >> drivers/gpu/drm/vc4/vc4_gem.c | 31 +++++++++++++++++++------------ >> 2 files changed, 29 insertions(+), 12 deletions(-) >> >> diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/ >> vc4_drv.h >> index 4a078ffd9f82..03ed40ab9a93 100644 >> --- a/drivers/gpu/drm/vc4/vc4_drv.h >> +++ b/drivers/gpu/drm/vc4/vc4_drv.h >> @@ -924,6 +924,16 @@ struct vc4_validated_shader_info { >> (Wmax)) >> #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, >> 1000) >> +static inline unsigned long nsecs_to_jiffies_timeout(const u64 n) >> +{ >> + /* nsecs_to_jiffies64() does not guard against overflow */ >> + if ((NSEC_PER_SEC % HZ) != 0 && >> + div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ) >> + return MAX_JIFFY_OFFSET; >> + >> + return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1); >> +} >> + >> /* vc4_bo.c */ >> struct drm_gem_object *vc4_create_object(struct drm_device *dev, >> size_t size); >> struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size, >> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/ >> vc4_gem.c >> index 1021f45cb53c..4037c65eb269 100644 >> --- a/drivers/gpu/drm/vc4/vc4_gem.c >> +++ b/drivers/gpu/drm/vc4/vc4_gem.c >> @@ -1017,11 +1017,13 @@ int >> vc4_wait_bo_ioctl(struct drm_device *dev, void *data, >> struct drm_file *file_priv) >> { >> - struct vc4_dev *vc4 = to_vc4_dev(dev); >> int ret; >> + struct vc4_dev *vc4 = to_vc4_dev(dev); >> struct drm_vc4_wait_bo *args = data; >> - struct drm_gem_object *gem_obj; >> - struct vc4_bo *bo; >> + unsigned long timeout_jiffies = >> + nsecs_to_jiffies_timeout(args->timeout_ns); > > What about a potentially simpler alternative: > > timeout_jiffies = usecs_to_jiffies(div_u64(args->timeout_ns, 1000)); > > No need for a new helper and code cannot sleep for less than one jiffy > anyway. I ended up deciding to use a function available on "drm_utils.h", `drm_timeout_abs_to_jiffies()`, which seems to deal appropriately with overflows. > >> + ktime_t start = ktime_get(); >> + u64 delta_ns; >> if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) >> return -ENODEV; >> @@ -1029,17 +1031,22 @@ vc4_wait_bo_ioctl(struct drm_device *dev, void >> *data, >> if (args->pad != 0) >> return -EINVAL; >> - gem_obj = drm_gem_object_lookup(file_priv, args->handle); >> - if (!gem_obj) { >> - DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); >> - return -EINVAL; >> - } >> - bo = to_vc4_bo(gem_obj); >> + ret = drm_gem_dma_resv_wait(file_priv, args->handle, >> + true, timeout_jiffies); >> - ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, >> - &args->timeout_ns); >> + /* Decrement the user's timeout, in case we got interrupted >> + * such that the ioctl will be restarted. >> + */ >> + delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start)); >> + if (delta_ns < args->timeout_ns) >> + args->timeout_ns -= delta_ns; >> + else >> + args->timeout_ns = 0; >> + >> + /* Asked to wait beyond the jiffy/scheduler precision? */ >> + if (ret == -ETIME && args->timeout_ns) >> + ret = -EAGAIN; > > EAGAIN is new from here, right? Will userspace cope in the right way? Thanks for noticing! I'll address it in the next version. Best Regards, - Maíra > > Regards, > > Tvrtko > >> - drm_gem_object_put(gem_obj); >> return ret; >> }
On 18/12/2024 11:36, Maíra Canal wrote: > Hi Tvrtko, > > On 18/12/24 07:41, Tvrtko Ursulin wrote: >> >> On 12/12/2024 20:20, Maíra Canal wrote: >>> As the BOs used by VC4 have DMA Reservation Objects attached to it, >>> there is no need to use seqnos wait for the BOs availability. Instead, >>> we can use `dma_gem_dma_resv_wait()`. >>> >>> Signed-off-by: Maíra Canal <mcanal@igalia.com> >>> --- >>> drivers/gpu/drm/vc4/vc4_drv.h | 10 ++++++++++ >>> drivers/gpu/drm/vc4/vc4_gem.c | 31 +++++++++++++++++++------------ >>> 2 files changed, 29 insertions(+), 12 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/ >>> vc4_drv.h >>> index 4a078ffd9f82..03ed40ab9a93 100644 >>> --- a/drivers/gpu/drm/vc4/vc4_drv.h >>> +++ b/drivers/gpu/drm/vc4/vc4_drv.h >>> @@ -924,6 +924,16 @@ struct vc4_validated_shader_info { >>> (Wmax)) >>> #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, >>> 10, 1000) >>> +static inline unsigned long nsecs_to_jiffies_timeout(const u64 n) >>> +{ >>> + /* nsecs_to_jiffies64() does not guard against overflow */ >>> + if ((NSEC_PER_SEC % HZ) != 0 && >>> + div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ) >>> + return MAX_JIFFY_OFFSET; >>> + >>> + return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1); >>> +} >>> + >>> /* vc4_bo.c */ >>> struct drm_gem_object *vc4_create_object(struct drm_device *dev, >>> size_t size); >>> struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size, >>> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/ >>> vc4_gem.c >>> index 1021f45cb53c..4037c65eb269 100644 >>> --- a/drivers/gpu/drm/vc4/vc4_gem.c >>> +++ b/drivers/gpu/drm/vc4/vc4_gem.c >>> @@ -1017,11 +1017,13 @@ int >>> vc4_wait_bo_ioctl(struct drm_device *dev, void *data, >>> struct drm_file *file_priv) >>> { >>> - struct vc4_dev *vc4 = to_vc4_dev(dev); >>> int ret; >>> + struct vc4_dev *vc4 = to_vc4_dev(dev); >>> struct drm_vc4_wait_bo *args = data; >>> - struct drm_gem_object *gem_obj; >>> - struct vc4_bo *bo; >>> + unsigned long timeout_jiffies = >>> + nsecs_to_jiffies_timeout(args->timeout_ns); >> >> What about a potentially simpler alternative: >> >> timeout_jiffies = usecs_to_jiffies(div_u64(args->timeout_ns, 1000)); >> >> No need for a new helper and code cannot sleep for less than one jiffy >> anyway. > > I ended up deciding to use a function available on "drm_utils.h", > `drm_timeout_abs_to_jiffies()`, which seems to deal appropriately with > overflows. You will convert relative to absolute to be able to use it? At that point usecs_to_jiffies() might still be more elegant. Regards, Tvrtko > >> >>> + ktime_t start = ktime_get(); >>> + u64 delta_ns; >>> if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) >>> return -ENODEV; >>> @@ -1029,17 +1031,22 @@ vc4_wait_bo_ioctl(struct drm_device *dev, >>> void *data, >>> if (args->pad != 0) >>> return -EINVAL; >>> - gem_obj = drm_gem_object_lookup(file_priv, args->handle); >>> - if (!gem_obj) { >>> - DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); >>> - return -EINVAL; >>> - } >>> - bo = to_vc4_bo(gem_obj); >>> + ret = drm_gem_dma_resv_wait(file_priv, args->handle, >>> + true, timeout_jiffies); >>> - ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, >>> - &args->timeout_ns); >>> + /* Decrement the user's timeout, in case we got interrupted >>> + * such that the ioctl will be restarted. >>> + */ >>> + delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start)); >>> + if (delta_ns < args->timeout_ns) >>> + args->timeout_ns -= delta_ns; >>> + else >>> + args->timeout_ns = 0; >>> + >>> + /* Asked to wait beyond the jiffy/scheduler precision? */ >>> + if (ret == -ETIME && args->timeout_ns) >>> + ret = -EAGAIN; >> >> EAGAIN is new from here, right? Will userspace cope in the right way? > > Thanks for noticing! I'll address it in the next version. > > Best Regards, > - Maíra > >> >> Regards, >> >> Tvrtko >> >>> - drm_gem_object_put(gem_obj); >>> return ret; >>> } >
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 4a078ffd9f82..03ed40ab9a93 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -924,6 +924,16 @@ struct vc4_validated_shader_info { (Wmax)) #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) +static inline unsigned long nsecs_to_jiffies_timeout(const u64 n) +{ + /* nsecs_to_jiffies64() does not guard against overflow */ + if ((NSEC_PER_SEC % HZ) != 0 && + div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ) + return MAX_JIFFY_OFFSET; + + return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1); +} + /* vc4_bo.c */ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size); struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size, diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 1021f45cb53c..4037c65eb269 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -1017,11 +1017,13 @@ int vc4_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct vc4_dev *vc4 = to_vc4_dev(dev); int ret; + struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_wait_bo *args = data; - struct drm_gem_object *gem_obj; - struct vc4_bo *bo; + unsigned long timeout_jiffies = + nsecs_to_jiffies_timeout(args->timeout_ns); + ktime_t start = ktime_get(); + u64 delta_ns; if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) return -ENODEV; @@ -1029,17 +1031,22 @@ vc4_wait_bo_ioctl(struct drm_device *dev, void *data, if (args->pad != 0) return -EINVAL; - gem_obj = drm_gem_object_lookup(file_priv, args->handle); - if (!gem_obj) { - DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); - return -EINVAL; - } - bo = to_vc4_bo(gem_obj); + ret = drm_gem_dma_resv_wait(file_priv, args->handle, + true, timeout_jiffies); - ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, - &args->timeout_ns); + /* Decrement the user's timeout, in case we got interrupted + * such that the ioctl will be restarted. + */ + delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start)); + if (delta_ns < args->timeout_ns) + args->timeout_ns -= delta_ns; + else + args->timeout_ns = 0; + + /* Asked to wait beyond the jiffy/scheduler precision? */ + if (ret == -ETIME && args->timeout_ns) + ret = -EAGAIN; - drm_gem_object_put(gem_obj); return ret; }
As the BOs used by VC4 have DMA Reservation Objects attached to it, there is no need to use seqnos wait for the BOs availability. Instead, we can use `dma_gem_dma_resv_wait()`. Signed-off-by: Maíra Canal <mcanal@igalia.com> --- drivers/gpu/drm/vc4/vc4_drv.h | 10 ++++++++++ drivers/gpu/drm/vc4/vc4_gem.c | 31 +++++++++++++++++++------------ 2 files changed, 29 insertions(+), 12 deletions(-)