Message ID | 20210909194917.66433-1-jose.souza@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [CI,1/2] drm/i915/display/skl+: Drop frontbuffer rendering support | expand |
On Thu, Sep 09, 2021 at 12:49:16PM -0700, José Roberto de Souza wrote: > By now all the userspace applications should have migrated to atomic > or at least be calling DRM_IOCTL_MODE_DIRTYFB. > > With that we can kill frontbuffer rendering support in i915 for > modern platforms. > > So here converting legacy APIs into atomic commits so it can be > properly handled by driver i915. > > Several IGT tests will fail with this changes, because some tests > were stressing those frontbuffer rendering scenarios that no userspace > should be using by now, fixes to IGT should be sent soon. > I just gave this a try here and it's unusable. glxgears went from 9000 to 120 fps (was expecting 60fps tbh, not sure why I get double), everything lags like mad, if I drag a window around glxgears/other windows stop updating entirely, etc. NAK > v2: > - return earlier to not set fb_tracking.busy/flip_bits > - added a warn on to make sure we are not setting the busy/flip_bits > > Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com> > Cc: Daniel Vetter <daniel@ffwll.ch> > Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> > Cc: Jani Nikula <jani.nikula@intel.com> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> > Signed-off-by: José Roberto de Souza <jose.souza@intel.com> > --- > drivers/gpu/drm/i915/display/intel_cursor.c | 6 ++---- > drivers/gpu/drm/i915/display/intel_fb.c | 8 +++++++- > .../gpu/drm/i915/display/intel_frontbuffer.c | 18 ++++++++++++++++++ > drivers/gpu/drm/i915/i915_drv.h | 2 ++ > 4 files changed, 29 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c > index c7618fef01439..5aa996c3b7980 100644 > --- a/drivers/gpu/drm/i915/display/intel_cursor.c > +++ b/drivers/gpu/drm/i915/display/intel_cursor.c > @@ -617,6 +617,7 @@ intel_legacy_cursor_update(struct drm_plane *_plane, > u32 src_w, u32 src_h, > struct drm_modeset_acquire_ctx *ctx) > { > + struct drm_i915_private *i915 = to_i915(_crtc->dev); > struct intel_plane *plane = to_intel_plane(_plane); > struct intel_crtc *crtc = to_intel_crtc(_crtc); > struct intel_plane_state *old_plane_state = > @@ -633,12 +634,9 @@ intel_legacy_cursor_update(struct drm_plane *_plane, > * PSR2 selective fetch also requires the slow path as > * PSR2 plane and transcoder registers can only be updated during > * vblank. > - * > - * FIXME bigjoiner fastpath would be good > */ > if (!crtc_state->hw.active || intel_crtc_needs_modeset(crtc_state) || > - crtc_state->update_pipe || crtc_state->bigjoiner || > - crtc_state->enable_psr2_sel_fetch) > + crtc_state->update_pipe || !HAS_FRONTBUFFER_RENDERING(i915)) > goto slow; > > /* > diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c > index e4b8602ec0cd2..3eb60785c9f29 100644 > --- a/drivers/gpu/drm/i915/display/intel_fb.c > +++ b/drivers/gpu/drm/i915/display/intel_fb.c > @@ -3,6 +3,7 @@ > * Copyright © 2021 Intel Corporation > */ > > +#include <drm/drm_damage_helper.h> > #include <drm/drm_framebuffer.h> > #include <drm/drm_modeset_helper.h> > > @@ -1235,10 +1236,15 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb, > unsigned int num_clips) > { > struct drm_i915_gem_object *obj = intel_fb_obj(fb); > + struct drm_i915_private *i915 = to_i915(obj->base.dev); > > i915_gem_object_flush_if_display(obj); > - intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_DIRTYFB); > > + if (!HAS_FRONTBUFFER_RENDERING(i915)) > + return drm_atomic_helper_dirtyfb(fb, file, flags, color, clips, > + num_clips); > + > + intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_DIRTYFB); > return 0; > } > > diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c > index 0492446cd04ad..3860f87dac31c 100644 > --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c > +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c > @@ -112,6 +112,9 @@ static void frontbuffer_flush(struct drm_i915_private *i915, > void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915, > unsigned frontbuffer_bits) > { > + if (!HAS_FRONTBUFFER_RENDERING(i915)) > + return; > + > spin_lock(&i915->fb_tracking.lock); > i915->fb_tracking.flip_bits |= frontbuffer_bits; > /* Remove stale busy bits due to the old buffer. */ > @@ -132,6 +135,12 @@ void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915, > void intel_frontbuffer_flip_complete(struct drm_i915_private *i915, > unsigned frontbuffer_bits) > { > + if (!HAS_FRONTBUFFER_RENDERING(i915)) { > + drm_WARN_ON_ONCE(&i915->drm, i915->fb_tracking.flip_bits | > + i915->fb_tracking.busy_bits); > + return; > + } > + > spin_lock(&i915->fb_tracking.lock); > /* Mask any cancelled flips. */ > frontbuffer_bits &= i915->fb_tracking.flip_bits; > @@ -156,6 +165,9 @@ void intel_frontbuffer_flip_complete(struct drm_i915_private *i915, > void intel_frontbuffer_flip(struct drm_i915_private *i915, > unsigned frontbuffer_bits) > { > + if (!HAS_FRONTBUFFER_RENDERING(i915)) > + return; > + > spin_lock(&i915->fb_tracking.lock); > /* Remove stale busy bits due to the old buffer. */ > i915->fb_tracking.busy_bits &= ~frontbuffer_bits; > @@ -170,6 +182,9 @@ void __intel_fb_invalidate(struct intel_frontbuffer *front, > { > struct drm_i915_private *i915 = to_i915(front->obj->base.dev); > > + if (!HAS_FRONTBUFFER_RENDERING(i915)) > + return; > + > if (origin == ORIGIN_CS) { > spin_lock(&i915->fb_tracking.lock); > i915->fb_tracking.busy_bits |= frontbuffer_bits; > @@ -191,6 +206,9 @@ void __intel_fb_flush(struct intel_frontbuffer *front, > { > struct drm_i915_private *i915 = to_i915(front->obj->base.dev); > > + if (!HAS_FRONTBUFFER_RENDERING(i915)) > + return; > + > if (origin == ORIGIN_CS) { > spin_lock(&i915->fb_tracking.lock); > /* Filter out new bits since rendering started. */ > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 37c1ca266bcdf..3324ba8d8523c 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1699,6 +1699,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, > > #define HAS_ASYNC_FLIPS(i915) (DISPLAY_VER(i915) >= 5) > > +#define HAS_FRONTBUFFER_RENDERING(i915) (DISPLAY_VER(i915) < 9) > + > /* Only valid when HAS_DISPLAY() is true */ > #define INTEL_DISPLAY_ENABLED(dev_priv) \ > (drm_WARN_ON(&(dev_priv)->drm, !HAS_DISPLAY(dev_priv)), !(dev_priv)->params.disable_display) > -- > 2.33.0
On Thu, 2021-09-09 at 23:20 +0300, Ville Syrjälä wrote: > On Thu, Sep 09, 2021 at 12:49:16PM -0700, José Roberto de Souza wrote: > > By now all the userspace applications should have migrated to atomic > > or at least be calling DRM_IOCTL_MODE_DIRTYFB. > > > > With that we can kill frontbuffer rendering support in i915 for > > modern platforms. > > > > So here converting legacy APIs into atomic commits so it can be > > properly handled by driver i915. > > > > Several IGT tests will fail with this changes, because some tests > > were stressing those frontbuffer rendering scenarios that no userspace > > should be using by now, fixes to IGT should be sent soon. > > > > I just gave this a try here and it's unusable. glxgears went from > 9000 to 120 fps (was expecting 60fps tbh, not sure why I get > double), everything lags like mad, if I drag a window around > glxgears/other windows stop updating entirely, etc. NAK Can you share your setup? What GPU? Desktop environment? Mesa version? resolutions of sinks? Will try it in my end. > > > v2: > > - return earlier to not set fb_tracking.busy/flip_bits > > - added a warn on to make sure we are not setting the busy/flip_bits > > > > Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com> > > Cc: Daniel Vetter <daniel@ffwll.ch> > > Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com> > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> > > Cc: Jani Nikula <jani.nikula@intel.com> > > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> > > Signed-off-by: José Roberto de Souza <jose.souza@intel.com> > > --- > > drivers/gpu/drm/i915/display/intel_cursor.c | 6 ++---- > > drivers/gpu/drm/i915/display/intel_fb.c | 8 +++++++- > > .../gpu/drm/i915/display/intel_frontbuffer.c | 18 ++++++++++++++++++ > > drivers/gpu/drm/i915/i915_drv.h | 2 ++ > > 4 files changed, 29 insertions(+), 5 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c > > index c7618fef01439..5aa996c3b7980 100644 > > --- a/drivers/gpu/drm/i915/display/intel_cursor.c > > +++ b/drivers/gpu/drm/i915/display/intel_cursor.c > > @@ -617,6 +617,7 @@ intel_legacy_cursor_update(struct drm_plane *_plane, > > u32 src_w, u32 src_h, > > struct drm_modeset_acquire_ctx *ctx) > > { > > + struct drm_i915_private *i915 = to_i915(_crtc->dev); > > struct intel_plane *plane = to_intel_plane(_plane); > > struct intel_crtc *crtc = to_intel_crtc(_crtc); > > struct intel_plane_state *old_plane_state = > > @@ -633,12 +634,9 @@ intel_legacy_cursor_update(struct drm_plane *_plane, > > * PSR2 selective fetch also requires the slow path as > > * PSR2 plane and transcoder registers can only be updated during > > * vblank. > > - * > > - * FIXME bigjoiner fastpath would be good > > */ > > if (!crtc_state->hw.active || intel_crtc_needs_modeset(crtc_state) || > > - crtc_state->update_pipe || crtc_state->bigjoiner || > > - crtc_state->enable_psr2_sel_fetch) > > + crtc_state->update_pipe || !HAS_FRONTBUFFER_RENDERING(i915)) > > goto slow; > > > > /* > > diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c > > index e4b8602ec0cd2..3eb60785c9f29 100644 > > --- a/drivers/gpu/drm/i915/display/intel_fb.c > > +++ b/drivers/gpu/drm/i915/display/intel_fb.c > > @@ -3,6 +3,7 @@ > > * Copyright © 2021 Intel Corporation > > */ > > > > +#include <drm/drm_damage_helper.h> > > #include <drm/drm_framebuffer.h> > > #include <drm/drm_modeset_helper.h> > > > > @@ -1235,10 +1236,15 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb, > > unsigned int num_clips) > > { > > struct drm_i915_gem_object *obj = intel_fb_obj(fb); > > + struct drm_i915_private *i915 = to_i915(obj->base.dev); > > > > i915_gem_object_flush_if_display(obj); > > - intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_DIRTYFB); > > > > + if (!HAS_FRONTBUFFER_RENDERING(i915)) > > + return drm_atomic_helper_dirtyfb(fb, file, flags, color, clips, > > + num_clips); > > + > > + intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_DIRTYFB); > > return 0; > > } > > > > diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c > > index 0492446cd04ad..3860f87dac31c 100644 > > --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c > > +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c > > @@ -112,6 +112,9 @@ static void frontbuffer_flush(struct drm_i915_private *i915, > > void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915, > > unsigned frontbuffer_bits) > > { > > + if (!HAS_FRONTBUFFER_RENDERING(i915)) > > + return; > > + > > spin_lock(&i915->fb_tracking.lock); > > i915->fb_tracking.flip_bits |= frontbuffer_bits; > > /* Remove stale busy bits due to the old buffer. */ > > @@ -132,6 +135,12 @@ void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915, > > void intel_frontbuffer_flip_complete(struct drm_i915_private *i915, > > unsigned frontbuffer_bits) > > { > > + if (!HAS_FRONTBUFFER_RENDERING(i915)) { > > + drm_WARN_ON_ONCE(&i915->drm, i915->fb_tracking.flip_bits | > > + i915->fb_tracking.busy_bits); > > + return; > > + } > > + > > spin_lock(&i915->fb_tracking.lock); > > /* Mask any cancelled flips. */ > > frontbuffer_bits &= i915->fb_tracking.flip_bits; > > @@ -156,6 +165,9 @@ void intel_frontbuffer_flip_complete(struct drm_i915_private *i915, > > void intel_frontbuffer_flip(struct drm_i915_private *i915, > > unsigned frontbuffer_bits) > > { > > + if (!HAS_FRONTBUFFER_RENDERING(i915)) > > + return; > > + > > spin_lock(&i915->fb_tracking.lock); > > /* Remove stale busy bits due to the old buffer. */ > > i915->fb_tracking.busy_bits &= ~frontbuffer_bits; > > @@ -170,6 +182,9 @@ void __intel_fb_invalidate(struct intel_frontbuffer *front, > > { > > struct drm_i915_private *i915 = to_i915(front->obj->base.dev); > > > > + if (!HAS_FRONTBUFFER_RENDERING(i915)) > > + return; > > + > > if (origin == ORIGIN_CS) { > > spin_lock(&i915->fb_tracking.lock); > > i915->fb_tracking.busy_bits |= frontbuffer_bits; > > @@ -191,6 +206,9 @@ void __intel_fb_flush(struct intel_frontbuffer *front, > > { > > struct drm_i915_private *i915 = to_i915(front->obj->base.dev); > > > > + if (!HAS_FRONTBUFFER_RENDERING(i915)) > > + return; > > + > > if (origin == ORIGIN_CS) { > > spin_lock(&i915->fb_tracking.lock); > > /* Filter out new bits since rendering started. */ > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > > index 37c1ca266bcdf..3324ba8d8523c 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.h > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > @@ -1699,6 +1699,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, > > > > #define HAS_ASYNC_FLIPS(i915) (DISPLAY_VER(i915) >= 5) > > > > +#define HAS_FRONTBUFFER_RENDERING(i915) (DISPLAY_VER(i915) < 9) > > + > > /* Only valid when HAS_DISPLAY() is true */ > > #define INTEL_DISPLAY_ENABLED(dev_priv) \ > > (drm_WARN_ON(&(dev_priv)->drm, !HAS_DISPLAY(dev_priv)), !(dev_priv)->params.disable_display) > > -- > > 2.33.0 >
On Thu, Sep 09, 2021 at 08:23:20PM +0000, Souza, Jose wrote: > On Thu, 2021-09-09 at 23:20 +0300, Ville Syrjälä wrote: > > On Thu, Sep 09, 2021 at 12:49:16PM -0700, José Roberto de Souza wrote: > > > By now all the userspace applications should have migrated to atomic > > > or at least be calling DRM_IOCTL_MODE_DIRTYFB. > > > > > > With that we can kill frontbuffer rendering support in i915 for > > > modern platforms. > > > > > > So here converting legacy APIs into atomic commits so it can be > > > properly handled by driver i915. > > > > > > Several IGT tests will fail with this changes, because some tests > > > were stressing those frontbuffer rendering scenarios that no userspace > > > should be using by now, fixes to IGT should be sent soon. > > > > > > > I just gave this a try here and it's unusable. glxgears went from > > 9000 to 120 fps (was expecting 60fps tbh, not sure why I get > > double), everything lags like mad, if I drag a window around > > glxgears/other windows stop updating entirely, etc. NAK > > Can you share your setup? What GPU? Desktop environment? Mesa version? resolutions of sinks? > Will try it in my end. Doesn't really matter as long as you don't have a compositor making a mess of things. This machine is a cfl running mate w/ compositor off, and some 1920x1200 display.
On Thu, 2021-09-09 at 23:28 +0300, Ville Syrjälä wrote: > On Thu, Sep 09, 2021 at 08:23:20PM +0000, Souza, Jose wrote: > > On Thu, 2021-09-09 at 23:20 +0300, Ville Syrjälä wrote: > > > On Thu, Sep 09, 2021 at 12:49:16PM -0700, José Roberto de Souza wrote: > > > > By now all the userspace applications should have migrated to atomic > > > > or at least be calling DRM_IOCTL_MODE_DIRTYFB. > > > > > > > > With that we can kill frontbuffer rendering support in i915 for > > > > modern platforms. > > > > > > > > So here converting legacy APIs into atomic commits so it can be > > > > properly handled by driver i915. > > > > > > > > Several IGT tests will fail with this changes, because some tests > > > > were stressing those frontbuffer rendering scenarios that no userspace > > > > should be using by now, fixes to IGT should be sent soon. > > > > > > > > > > I just gave this a try here and it's unusable. glxgears went from > > > 9000 to 120 fps (was expecting 60fps tbh, not sure why I get > > > double), everything lags like mad, if I drag a window around > > > glxgears/other windows stop updating entirely, etc. NAK > > > > Can you share your setup? What GPU? Desktop environment? Mesa version? resolutions of sinks? > > Will try it in my end. > > Doesn't really matter as long as you don't have a compositor making a > mess of things. This machine is a cfl running mate w/ compositor off, > and some 1920x1200 display. > Making drm_atomic_helper_dirtyfb() do a non-blocking atomic commit makes user experience pretty similar to the one with compositing enabled: drm_atomic_commit() + compositor off: https://youtu.be/NBt6smXs99U drm_atomic_nonblocking_commit() + compositor off: https://youtu.be/QiMhkeGX_L8 drm_atomic_nonblocking_commit() + compositor on: https://youtu.be/KdpJyJ5k6sQ I do not completly agree with the comment in drm_atomic_helper_dirtyfb() about why it uses a blocking implementation. With frontbuffer rendering the registers are programmed but the content could only show up for user a whole frame later. Perhaps if this solutions is accetable we could have a non-blocking version of drm_atomic_helper_dirtyfb() so the drivers current using it don't have their behavior changed.
On Mon, Sep 13, 2021 at 10:54:14PM +0000, Souza, Jose wrote: > On Thu, 2021-09-09 at 23:28 +0300, Ville Syrjälä wrote: > > On Thu, Sep 09, 2021 at 08:23:20PM +0000, Souza, Jose wrote: > > > On Thu, 2021-09-09 at 23:20 +0300, Ville Syrjälä wrote: > > > > On Thu, Sep 09, 2021 at 12:49:16PM -0700, José Roberto de Souza wrote: > > > > > By now all the userspace applications should have migrated to atomic > > > > > or at least be calling DRM_IOCTL_MODE_DIRTYFB. > > > > > > > > > > With that we can kill frontbuffer rendering support in i915 for > > > > > modern platforms. > > > > > > > > > > So here converting legacy APIs into atomic commits so it can be > > > > > properly handled by driver i915. > > > > > > > > > > Several IGT tests will fail with this changes, because some tests > > > > > were stressing those frontbuffer rendering scenarios that no userspace > > > > > should be using by now, fixes to IGT should be sent soon. > > > > > > > > > > > > > I just gave this a try here and it's unusable. glxgears went from > > > > 9000 to 120 fps (was expecting 60fps tbh, not sure why I get > > > > double), everything lags like mad, if I drag a window around > > > > glxgears/other windows stop updating entirely, etc. NAK > > > > > > Can you share your setup? What GPU? Desktop environment? Mesa version? resolutions of sinks? > > > Will try it in my end. > > > > Doesn't really matter as long as you don't have a compositor making a > > mess of things. This machine is a cfl running mate w/ compositor off, > > and some 1920x1200 display. > > > > Making drm_atomic_helper_dirtyfb() do a non-blocking atomic commit makes user experience pretty similar to the one with compositing enabled: > drm_atomic_commit() + compositor off: https://youtu.be/NBt6smXs99U > drm_atomic_nonblocking_commit() + compositor off: https://youtu.be/QiMhkeGX_L8 > drm_atomic_nonblocking_commit() + compositor on: https://youtu.be/KdpJyJ5k6sQ > > > I do not completly agree with the comment in drm_atomic_helper_dirtyfb() about why it uses a blocking implementation. > With frontbuffer rendering the registers are programmed but the content could only show up for user a whole frame later. > > Perhaps if this solutions is accetable we could have a non-blocking version of drm_atomic_helper_dirtyfb() so the drivers current using it don't have > their behavior changed. Non-blocking update would make sense to me, whereas a blocking update makes no sense given how this is used by actual userspace. But this still changes the whole approach to eg. FBC nukes, so it's going to require some actual thought and testing to make sure it actually works as intended. I think by just essentially page flipping to the same buffer we're going to depend on semi-undefined behaviour for FBC. So I'm a bit worried that this is going to introduce new bugs. I have verified on ancient platforms that FBC does in fact flip nuke when flipping to the same buffer (and now we even (ab)use that fact in the frontbuffer tracking code), but I've not done that for any new platforms. The other concern is that introducing additional atomic commits this might trip up the page flip->-EBUSY stuff. Whether that is going to cause any real grief to userspace is unknown at this time. I guess we'd have to come up with a test that actually hits that and make sure the ddx doesn't get confused and instead just gracefully falls back to the blit path.
On Wed, Sep 15, 2021 at 04:48:49PM +0300, Ville Syrjälä wrote: > On Mon, Sep 13, 2021 at 10:54:14PM +0000, Souza, Jose wrote: > > On Thu, 2021-09-09 at 23:28 +0300, Ville Syrjälä wrote: > > > On Thu, Sep 09, 2021 at 08:23:20PM +0000, Souza, Jose wrote: > > > > On Thu, 2021-09-09 at 23:20 +0300, Ville Syrjälä wrote: > > > > > On Thu, Sep 09, 2021 at 12:49:16PM -0700, José Roberto de Souza wrote: > > > > > > By now all the userspace applications should have migrated to atomic > > > > > > or at least be calling DRM_IOCTL_MODE_DIRTYFB. > > > > > > > > > > > > With that we can kill frontbuffer rendering support in i915 for > > > > > > modern platforms. > > > > > > > > > > > > So here converting legacy APIs into atomic commits so it can be > > > > > > properly handled by driver i915. > > > > > > > > > > > > Several IGT tests will fail with this changes, because some tests > > > > > > were stressing those frontbuffer rendering scenarios that no userspace > > > > > > should be using by now, fixes to IGT should be sent soon. > > > > > > > > > > > > > > > > I just gave this a try here and it's unusable. glxgears went from > > > > > 9000 to 120 fps (was expecting 60fps tbh, not sure why I get > > > > > double), everything lags like mad, if I drag a window around > > > > > glxgears/other windows stop updating entirely, etc. NAK > > > > > > > > Can you share your setup? What GPU? Desktop environment? Mesa version? resolutions of sinks? > > > > Will try it in my end. > > > > > > Doesn't really matter as long as you don't have a compositor making a > > > mess of things. This machine is a cfl running mate w/ compositor off, > > > and some 1920x1200 display. > > > > > > > Making drm_atomic_helper_dirtyfb() do a non-blocking atomic commit makes user experience pretty similar to the one with compositing enabled: > > drm_atomic_commit() + compositor off: https://youtu.be/NBt6smXs99U > > drm_atomic_nonblocking_commit() + compositor off: https://youtu.be/QiMhkeGX_L8 > > drm_atomic_nonblocking_commit() + compositor on: https://youtu.be/KdpJyJ5k6sQ > > > > > > I do not completly agree with the comment in drm_atomic_helper_dirtyfb() about why it uses a blocking implementation. > > With frontbuffer rendering the registers are programmed but the content could only show up for user a whole frame later. > > > > Perhaps if this solutions is accetable we could have a non-blocking version of drm_atomic_helper_dirtyfb() so the drivers current using it don't have > > their behavior changed. > > Non-blocking update would make sense to me, whereas a blocking > update makes no sense given how this is used by actual userspace. Actually neither maybe makes total sense since userspace probably isn't expecting -EBUSY from dirtyfb. So we might end up with stale junk on the screen if no further updates come in after an -EBUSY. The current frontbuffer stuff works much more like a mailbox style update so we don't lose stuff and neither do we block.
On Wed, Sep 15, 2021 at 06:57:19PM +0300, Ville Syrjälä wrote: > On Wed, Sep 15, 2021 at 04:48:49PM +0300, Ville Syrjälä wrote: > > On Mon, Sep 13, 2021 at 10:54:14PM +0000, Souza, Jose wrote: > > > On Thu, 2021-09-09 at 23:28 +0300, Ville Syrjälä wrote: > > > > On Thu, Sep 09, 2021 at 08:23:20PM +0000, Souza, Jose wrote: > > > > > On Thu, 2021-09-09 at 23:20 +0300, Ville Syrjälä wrote: > > > > > > On Thu, Sep 09, 2021 at 12:49:16PM -0700, José Roberto de Souza wrote: > > > > > > > By now all the userspace applications should have migrated to atomic > > > > > > > or at least be calling DRM_IOCTL_MODE_DIRTYFB. > > > > > > > > > > > > > > With that we can kill frontbuffer rendering support in i915 for > > > > > > > modern platforms. > > > > > > > > > > > > > > So here converting legacy APIs into atomic commits so it can be > > > > > > > properly handled by driver i915. > > > > > > > > > > > > > > Several IGT tests will fail with this changes, because some tests > > > > > > > were stressing those frontbuffer rendering scenarios that no userspace > > > > > > > should be using by now, fixes to IGT should be sent soon. > > > > > > > > > > > > > > > > > > > I just gave this a try here and it's unusable. glxgears went from > > > > > > 9000 to 120 fps (was expecting 60fps tbh, not sure why I get > > > > > > double), everything lags like mad, if I drag a window around > > > > > > glxgears/other windows stop updating entirely, etc. NAK > > > > > > > > > > Can you share your setup? What GPU? Desktop environment? Mesa version? resolutions of sinks? > > > > > Will try it in my end. > > > > > > > > Doesn't really matter as long as you don't have a compositor making a > > > > mess of things. This machine is a cfl running mate w/ compositor off, > > > > and some 1920x1200 display. > > > > > > > > > > Making drm_atomic_helper_dirtyfb() do a non-blocking atomic commit makes user experience pretty similar to the one with compositing enabled: > > > drm_atomic_commit() + compositor off: https://youtu.be/NBt6smXs99U > > > drm_atomic_nonblocking_commit() + compositor off: https://youtu.be/QiMhkeGX_L8 > > > drm_atomic_nonblocking_commit() + compositor on: https://youtu.be/KdpJyJ5k6sQ > > > > > > > > > I do not completly agree with the comment in drm_atomic_helper_dirtyfb() about why it uses a blocking implementation. > > > With frontbuffer rendering the registers are programmed but the content could only show up for user a whole frame later. > > > > > > Perhaps if this solutions is accetable we could have a non-blocking version of drm_atomic_helper_dirtyfb() so the drivers current using it don't have > > > their behavior changed. > > > > Non-blocking update would make sense to me, whereas a blocking > > update makes no sense given how this is used by actual userspace. > > Actually neither maybe makes total sense since userspace probably > isn't expecting -EBUSY from dirtyfb. So we might end up with stale > junk on the screen if no further updates come in after an -EBUSY. One option would be to teach userspace to retry after an -EBUSY, but without a completion event from dirtyfb it's going to be hard to know when to retry. > > The current frontbuffer stuff works much more like a mailbox style > update so we don't lose stuff and neither do we block. I suppose the obvious solution would be to teach kms to do proper mailbox updates. But that is not entirely trivial. One way to simplify the proposal a bit is to limit it to pure pageflips, which would suffice for dirtyfb as well obviously. That way watermarks and other potentially tricky stuff wouldn't change. But actually figuring out the proper commit sequence for this might take some doing. The way I think it should work is that we just let each commit through without blocking on the previous one. A later commit may simply overwrite the hardware registers before the values written by the previous commit get latched. The vblank evasion stuff would make it perfectly safe. The hardest thing there is probably figuring out how to handle all the pre/post_plane_update() stuff properly since we can't know ahead of time whether the flip gets latched or not, and so we can't really use the old state during state computation to make any decisions affecting our future behaviour. But given that async flips are more or less working today might mean that I'm worried about nothing. Either that or async flips are in fact broken in some funny ways I've not yet realized... The other problem for actual mailbox page flips is completion events/out fences. The current out fence I think is not really suitable for cases where a flip ends up getting overwritten by a later one, and thus the fb rotation no longer follows the normal fifo order. But of we don't expose actual mailbox page flips through the uapi then we could avoid worrying about this stuff initially.
On Wed, 2021-09-15 at 16:48 +0300, Ville Syrjälä wrote: > On Mon, Sep 13, 2021 at 10:54:14PM +0000, Souza, Jose wrote: > > On Thu, 2021-09-09 at 23:28 +0300, Ville Syrjälä wrote: > > > On Thu, Sep 09, 2021 at 08:23:20PM +0000, Souza, Jose wrote: > > > > On Thu, 2021-09-09 at 23:20 +0300, Ville Syrjälä wrote: > > > > > On Thu, Sep 09, 2021 at 12:49:16PM -0700, José Roberto de Souza wrote: > > > > > > By now all the userspace applications should have migrated to atomic > > > > > > or at least be calling DRM_IOCTL_MODE_DIRTYFB. > > > > > > > > > > > > With that we can kill frontbuffer rendering support in i915 for > > > > > > modern platforms. > > > > > > > > > > > > So here converting legacy APIs into atomic commits so it can be > > > > > > properly handled by driver i915. > > > > > > > > > > > > Several IGT tests will fail with this changes, because some tests > > > > > > were stressing those frontbuffer rendering scenarios that no userspace > > > > > > should be using by now, fixes to IGT should be sent soon. > > > > > > > > > > > > > > > > I just gave this a try here and it's unusable. glxgears went from > > > > > 9000 to 120 fps (was expecting 60fps tbh, not sure why I get > > > > > double), everything lags like mad, if I drag a window around > > > > > glxgears/other windows stop updating entirely, etc. NAK > > > > > > > > Can you share your setup? What GPU? Desktop environment? Mesa version? resolutions of sinks? > > > > Will try it in my end. > > > > > > Doesn't really matter as long as you don't have a compositor making a > > > mess of things. This machine is a cfl running mate w/ compositor off, > > > and some 1920x1200 display. > > > > > > > Making drm_atomic_helper_dirtyfb() do a non-blocking atomic commit makes user experience pretty similar to the one with compositing enabled: > > drm_atomic_commit() + compositor off: https://youtu.be/NBt6smXs99U > > drm_atomic_nonblocking_commit() + compositor off: https://youtu.be/QiMhkeGX_L8 > > drm_atomic_nonblocking_commit() + compositor on: https://youtu.be/KdpJyJ5k6sQ > > > > > > I do not completly agree with the comment in drm_atomic_helper_dirtyfb() about why it uses a blocking implementation. > > With frontbuffer rendering the registers are programmed but the content could only show up for user a whole frame later. > > > > Perhaps if this solutions is accetable we could have a non-blocking version of drm_atomic_helper_dirtyfb() so the drivers current using it don't have > > their behavior changed. > > Non-blocking update would make sense to me, whereas a blocking > update makes no sense given how this is used by actual userspace. > > But this still changes the whole approach to eg. FBC nukes, so it's > going to require some actual thought and testing to make sure it > actually works as intended. I think by just essentially page flipping > to the same buffer we're going to depend on semi-undefined behaviour > for FBC. So I'm a bit worried that this is going to introduce new bugs. > I have verified on ancient platforms that FBC does in fact flip nuke > when flipping to the same buffer (and now we even (ab)use that fact > in the frontbuffer tracking code), but I've not done that for any new > platforms. I believe the current fbc tests in kms_frontbuffer_tracking already stress this. For PSR2 hardware tracking in SKL and TGL I have validated(PSR2 is not covered by kms_frontbuffer_tracking due pipe CRC not work with it) this and flipping to the same buffer causes the hardware to properly update the screen. > > The other concern is that introducing additional atomic commits this > might trip up the page flip->-EBUSY stuff. Whether that is going > to cause any real grief to userspace is unknown at this time. I guess > we'd have to come up with a test that actually hits that and make sure > the ddx doesn't get confused and instead just gracefully falls back to > the blit path. >
On Wed, 2021-09-15 at 19:49 +0300, Ville Syrjälä wrote: > On Wed, Sep 15, 2021 at 06:57:19PM +0300, Ville Syrjälä wrote: > > On Wed, Sep 15, 2021 at 04:48:49PM +0300, Ville Syrjälä wrote: > > > On Mon, Sep 13, 2021 at 10:54:14PM +0000, Souza, Jose wrote: > > > > On Thu, 2021-09-09 at 23:28 +0300, Ville Syrjälä wrote: > > > > > On Thu, Sep 09, 2021 at 08:23:20PM +0000, Souza, Jose wrote: > > > > > > On Thu, 2021-09-09 at 23:20 +0300, Ville Syrjälä wrote: > > > > > > > On Thu, Sep 09, 2021 at 12:49:16PM -0700, José Roberto de Souza wrote: > > > > > > > > By now all the userspace applications should have migrated to atomic > > > > > > > > or at least be calling DRM_IOCTL_MODE_DIRTYFB. > > > > > > > > > > > > > > > > With that we can kill frontbuffer rendering support in i915 for > > > > > > > > modern platforms. > > > > > > > > > > > > > > > > So here converting legacy APIs into atomic commits so it can be > > > > > > > > properly handled by driver i915. > > > > > > > > > > > > > > > > Several IGT tests will fail with this changes, because some tests > > > > > > > > were stressing those frontbuffer rendering scenarios that no userspace > > > > > > > > should be using by now, fixes to IGT should be sent soon. > > > > > > > > > > > > > > > > > > > > > > I just gave this a try here and it's unusable. glxgears went from > > > > > > > 9000 to 120 fps (was expecting 60fps tbh, not sure why I get > > > > > > > double), everything lags like mad, if I drag a window around > > > > > > > glxgears/other windows stop updating entirely, etc. NAK > > > > > > > > > > > > Can you share your setup? What GPU? Desktop environment? Mesa version? resolutions of sinks? > > > > > > Will try it in my end. > > > > > > > > > > Doesn't really matter as long as you don't have a compositor making a > > > > > mess of things. This machine is a cfl running mate w/ compositor off, > > > > > and some 1920x1200 display. > > > > > > > > > > > > > Making drm_atomic_helper_dirtyfb() do a non-blocking atomic commit makes user experience pretty similar to the one with compositing enabled: > > > > drm_atomic_commit() + compositor off: https://youtu.be/NBt6smXs99U > > > > drm_atomic_nonblocking_commit() + compositor off: https://youtu.be/QiMhkeGX_L8 > > > > drm_atomic_nonblocking_commit() + compositor on: https://youtu.be/KdpJyJ5k6sQ > > > > > > > > > > > > I do not completly agree with the comment in drm_atomic_helper_dirtyfb() about why it uses a blocking implementation. > > > > With frontbuffer rendering the registers are programmed but the content could only show up for user a whole frame later. > > > > > > > > Perhaps if this solutions is accetable we could have a non-blocking version of drm_atomic_helper_dirtyfb() so the drivers current using it don't have > > > > their behavior changed. > > > > > > Non-blocking update would make sense to me, whereas a blocking > > > update makes no sense given how this is used by actual userspace. > > > > Actually neither maybe makes total sense since userspace probably > > isn't expecting -EBUSY from dirtyfb. So we might end up with stale > > junk on the screen if no further updates come in after an -EBUSY. 1000 dirtyfb calls will only have a different dirty areas, when executing a dirtyfb atomic commit we could append all the pending dirty areas into this atomic commit and remove all the pending dirtyfb atomic commits. I doubt that userspace will be able to fill WQ_MAX_ACTIVE dirtyfb calls in one single frame. > > One option would be to teach userspace to retry after an -EBUSY, but > without a completion event from dirtyfb it's going to be hard to know > when to retry. > > > > > The current frontbuffer stuff works much more like a mailbox style > > update so we don't lose stuff and neither do we block. > > I suppose the obvious solution would be to teach kms to do proper > mailbox updates. But that is not entirely trivial. One way to simplify > the proposal a bit is to limit it to pure pageflips, which would > suffice for dirtyfb as well obviously. That way watermarks and other > potentially tricky stuff wouldn't change. > > But actually figuring out the proper commit sequence for this might > take some doing. The way I think it should work is that we just let > each commit through without blocking on the previous one. A later > commit may simply overwrite the hardware registers before the > values written by the previous commit get latched. The vblank evasion > stuff would make it perfectly safe. > > The hardest thing there is probably figuring out how to handle all > the pre/post_plane_update() stuff properly since we can't know > ahead of time whether the flip gets latched or not, and so we can't > really use the old state during state computation to make any > decisions affecting our future behaviour. But given that async flips > are more or less working today might mean that I'm worried about nothing. > Either that or async flips are in fact broken in some funny ways I've not > yet realized... > > The other problem for actual mailbox page flips is completion events/out > fences. The current out fence I think is not really suitable for cases > where a flip ends up getting overwritten by a later one, and thus the > fb rotation no longer follows the normal fifo order. But of we don't > expose actual mailbox page flips through the uapi then we could avoid > worrying about this stuff initially. >
diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index c7618fef01439..5aa996c3b7980 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -617,6 +617,7 @@ intel_legacy_cursor_update(struct drm_plane *_plane, u32 src_w, u32 src_h, struct drm_modeset_acquire_ctx *ctx) { + struct drm_i915_private *i915 = to_i915(_crtc->dev); struct intel_plane *plane = to_intel_plane(_plane); struct intel_crtc *crtc = to_intel_crtc(_crtc); struct intel_plane_state *old_plane_state = @@ -633,12 +634,9 @@ intel_legacy_cursor_update(struct drm_plane *_plane, * PSR2 selective fetch also requires the slow path as * PSR2 plane and transcoder registers can only be updated during * vblank. - * - * FIXME bigjoiner fastpath would be good */ if (!crtc_state->hw.active || intel_crtc_needs_modeset(crtc_state) || - crtc_state->update_pipe || crtc_state->bigjoiner || - crtc_state->enable_psr2_sel_fetch) + crtc_state->update_pipe || !HAS_FRONTBUFFER_RENDERING(i915)) goto slow; /* diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index e4b8602ec0cd2..3eb60785c9f29 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -3,6 +3,7 @@ * Copyright © 2021 Intel Corporation */ +#include <drm/drm_damage_helper.h> #include <drm/drm_framebuffer.h> #include <drm/drm_modeset_helper.h> @@ -1235,10 +1236,15 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb, unsigned int num_clips) { struct drm_i915_gem_object *obj = intel_fb_obj(fb); + struct drm_i915_private *i915 = to_i915(obj->base.dev); i915_gem_object_flush_if_display(obj); - intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_DIRTYFB); + if (!HAS_FRONTBUFFER_RENDERING(i915)) + return drm_atomic_helper_dirtyfb(fb, file, flags, color, clips, + num_clips); + + intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_DIRTYFB); return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index 0492446cd04ad..3860f87dac31c 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -112,6 +112,9 @@ static void frontbuffer_flush(struct drm_i915_private *i915, void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915, unsigned frontbuffer_bits) { + if (!HAS_FRONTBUFFER_RENDERING(i915)) + return; + spin_lock(&i915->fb_tracking.lock); i915->fb_tracking.flip_bits |= frontbuffer_bits; /* Remove stale busy bits due to the old buffer. */ @@ -132,6 +135,12 @@ void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915, void intel_frontbuffer_flip_complete(struct drm_i915_private *i915, unsigned frontbuffer_bits) { + if (!HAS_FRONTBUFFER_RENDERING(i915)) { + drm_WARN_ON_ONCE(&i915->drm, i915->fb_tracking.flip_bits | + i915->fb_tracking.busy_bits); + return; + } + spin_lock(&i915->fb_tracking.lock); /* Mask any cancelled flips. */ frontbuffer_bits &= i915->fb_tracking.flip_bits; @@ -156,6 +165,9 @@ void intel_frontbuffer_flip_complete(struct drm_i915_private *i915, void intel_frontbuffer_flip(struct drm_i915_private *i915, unsigned frontbuffer_bits) { + if (!HAS_FRONTBUFFER_RENDERING(i915)) + return; + spin_lock(&i915->fb_tracking.lock); /* Remove stale busy bits due to the old buffer. */ i915->fb_tracking.busy_bits &= ~frontbuffer_bits; @@ -170,6 +182,9 @@ void __intel_fb_invalidate(struct intel_frontbuffer *front, { struct drm_i915_private *i915 = to_i915(front->obj->base.dev); + if (!HAS_FRONTBUFFER_RENDERING(i915)) + return; + if (origin == ORIGIN_CS) { spin_lock(&i915->fb_tracking.lock); i915->fb_tracking.busy_bits |= frontbuffer_bits; @@ -191,6 +206,9 @@ void __intel_fb_flush(struct intel_frontbuffer *front, { struct drm_i915_private *i915 = to_i915(front->obj->base.dev); + if (!HAS_FRONTBUFFER_RENDERING(i915)) + return; + if (origin == ORIGIN_CS) { spin_lock(&i915->fb_tracking.lock); /* Filter out new bits since rendering started. */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 37c1ca266bcdf..3324ba8d8523c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1699,6 +1699,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_ASYNC_FLIPS(i915) (DISPLAY_VER(i915) >= 5) +#define HAS_FRONTBUFFER_RENDERING(i915) (DISPLAY_VER(i915) < 9) + /* Only valid when HAS_DISPLAY() is true */ #define INTEL_DISPLAY_ENABLED(dev_priv) \ (drm_WARN_ON(&(dev_priv)->drm, !HAS_DISPLAY(dev_priv)), !(dev_priv)->params.disable_display)