diff mbox

[3/4] drm/i915: use ORIGIN_CPU for frontbuffer invalidation on WC mmaps

Message ID 1459804638-3588-4-git-send-email-paulo.r.zanoni@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zanoni, Paulo R April 4, 2016, 9:17 p.m. UTC
From: Chris Wilson <chris@chris-wilson.co.uk>

... instead of the previous ORIGIN_GTT. This should actually
invalidate FBC once something is written on the frontbuffer using WC
mmaps. The problem with ORIGIN_GTT is that the automatic hardware
tracking is not able to detect the WC writes as it can detect the GTT
writes.

This should help fix the SKL bug where nothing happens when you type
your username/password on lightdm.

This patch was originally pasted on an email by Chris and converted to
an actual git patch by Paulo.

Cc: <stable@vger.kernel.org> # 4.6
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_gem.c | 14 +++++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

Comments

Daniel Vetter April 25, 2016, 8:15 a.m. UTC | #1
On Mon, Apr 04, 2016 at 06:17:17PM -0300, Paulo Zanoni wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
> 
> ... instead of the previous ORIGIN_GTT. This should actually
> invalidate FBC once something is written on the frontbuffer using WC
> mmaps. The problem with ORIGIN_GTT is that the automatic hardware
> tracking is not able to detect the WC writes as it can detect the GTT
> writes.
> 
> This should help fix the SKL bug where nothing happens when you type
> your username/password on lightdm.
> 
> This patch was originally pasted on an email by Chris and converted to
> an actual git patch by Paulo.
> 
> Cc: <stable@vger.kernel.org> # 4.6
> Cc: Chris Wilson <chris@chris-wilson.co.uk>

This is From: Chris but lacks his sob. Can't merge as-is, pls ask him for
his sob on irc.

> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h |  1 +
>  drivers/gpu/drm/i915/i915_gem.c | 14 +++++++++++---
>  2 files changed, 12 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index dd18772..17b42a8 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2157,6 +2157,7 @@ struct drm_i915_gem_object {
>  	unsigned int cache_dirty:1;
>  
>  	unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
> +	unsigned int has_wc_mmap:1;
>  
>  	unsigned int pin_display;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 40f90c7..bfafa07 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1608,6 +1608,13 @@ static struct intel_rps_client *to_rps_client(struct drm_file *file)
>  	return &fpriv->rps;
>  }
>  
> +static enum fb_op_origin
> +write_origin(struct drm_i915_gem_object *obj, unsigned domain)
> +{
> +	return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ?
> +	       ORIGIN_GTT : ORIGIN_CPU;
> +}
> +
>  /**
>   * Called when user space prepares to use an object with the CPU, either
>   * through the mmap ioctl's mapping or a GTT mapping.
> @@ -1661,9 +1668,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>  		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
>  
>  	if (write_domain != 0)
> -		intel_fb_obj_invalidate(obj,
> -					write_domain == I915_GEM_DOMAIN_GTT ?
> -					ORIGIN_GTT : ORIGIN_CPU);
> +		intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
>  
>  unref:
>  	drm_gem_object_unreference(&obj->base);
> @@ -1761,6 +1766,9 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
>  		else
>  			addr = -ENOMEM;
>  		up_write(&mm->mmap_sem);
> +
> +		/* This may race, but that's ok, it only gets set */

This comment doesn't mesh with the

	unsigned int has_wc_mmap:1;

above. If you depend upon atomicity of writes at the hw level, your struct
member _must_ be a full machine word. Which in practice means

	/*
	 * IMPORTANT: We have unlocked access to this, this must be a
	 * stand-alone bool.
	 */
	bool has_wc_mmap;

You can only fold together different bitfields into one if they're _all_
protected by the same lock. gcc needs to generate read-modify-write cycles
to write them, which means if any of them are accessed through a separate
lock or lock-lessly, there's a real race.

With that fixed (and assuming the ABI fix has still Chris' ack/sob):

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> +		to_intel_bo(obj)->has_wc_mmap = true;
>  	}
>  	drm_gem_object_unreference_unlocked(obj);
>  	if (IS_ERR((void *)addr))
> -- 
> 2.8.0.rc3
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson April 25, 2016, 8:20 a.m. UTC | #2
On Mon, Apr 25, 2016 at 10:15:11AM +0200, Daniel Vetter wrote:
> On Mon, Apr 04, 2016 at 06:17:17PM -0300, Paulo Zanoni wrote:
> > From: Chris Wilson <chris@chris-wilson.co.uk>
> > 
> > ... instead of the previous ORIGIN_GTT. This should actually
> > invalidate FBC once something is written on the frontbuffer using WC
> > mmaps. The problem with ORIGIN_GTT is that the automatic hardware
> > tracking is not able to detect the WC writes as it can detect the GTT
> > writes.
> > 
> > This should help fix the SKL bug where nothing happens when you type
> > your username/password on lightdm.
> > 
> > This patch was originally pasted on an email by Chris and converted to
> > an actual git patch by Paulo.
> > 
> > Cc: <stable@vger.kernel.org> # 4.6
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> 
> This is From: Chris but lacks his sob. Can't merge as-is, pls ask him for
> his sob on irc.
> 
> > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h |  1 +
> >  drivers/gpu/drm/i915/i915_gem.c | 14 +++++++++++---
> >  2 files changed, 12 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index dd18772..17b42a8 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -2157,6 +2157,7 @@ struct drm_i915_gem_object {
> >  	unsigned int cache_dirty:1;
> >  
> >  	unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
> > +	unsigned int has_wc_mmap:1;
> >  
> >  	unsigned int pin_display;
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 40f90c7..bfafa07 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -1608,6 +1608,13 @@ static struct intel_rps_client *to_rps_client(struct drm_file *file)
> >  	return &fpriv->rps;
> >  }
> >  
> > +static enum fb_op_origin
> > +write_origin(struct drm_i915_gem_object *obj, unsigned domain)
> > +{
> > +	return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ?
> > +	       ORIGIN_GTT : ORIGIN_CPU;
> > +}
> > +
> >  /**
> >   * Called when user space prepares to use an object with the CPU, either
> >   * through the mmap ioctl's mapping or a GTT mapping.
> > @@ -1661,9 +1668,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
> >  		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
> >  
> >  	if (write_domain != 0)
> > -		intel_fb_obj_invalidate(obj,
> > -					write_domain == I915_GEM_DOMAIN_GTT ?
> > -					ORIGIN_GTT : ORIGIN_CPU);
> > +		intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
> >  
> >  unref:
> >  	drm_gem_object_unreference(&obj->base);
> > @@ -1761,6 +1766,9 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
> >  		else
> >  			addr = -ENOMEM;
> >  		up_write(&mm->mmap_sem);
> > +
> > +		/* This may race, but that's ok, it only gets set */
> 
> This comment doesn't mesh with the
> 
> 	unsigned int has_wc_mmap:1;
> 
> above. If you depend upon atomicity of writes at the hw level, your struct
> member _must_ be a full machine word. Which in practice means
> 
> 	/*
> 	 * IMPORTANT: We have unlocked access to this, this must be a
> 	 * stand-alone bool.
> 	 */
> 	bool has_wc_mmap;
> 
> You can only fold together different bitfields into one if they're _all_
> protected by the same lock. gcc needs to generate read-modify-write cycles
> to write them, which means if any of them are accessed through a separate
> lock or lock-lessly, there's a real race.
> 
> With that fixed (and assuming the ABI fix has still Chris' ack/sob):

Yup, we can do better and use WRITE_ONCE() as well. That would make GCC
complain about it being bogus.
-Chris
Daniel Vetter April 25, 2016, 8:27 a.m. UTC | #3
On Mon, Apr 25, 2016 at 09:20:05AM +0100, Chris Wilson wrote:
> On Mon, Apr 25, 2016 at 10:15:11AM +0200, Daniel Vetter wrote:
> > On Mon, Apr 04, 2016 at 06:17:17PM -0300, Paulo Zanoni wrote:
> > > From: Chris Wilson <chris@chris-wilson.co.uk>
> > > 
> > > ... instead of the previous ORIGIN_GTT. This should actually
> > > invalidate FBC once something is written on the frontbuffer using WC
> > > mmaps. The problem with ORIGIN_GTT is that the automatic hardware
> > > tracking is not able to detect the WC writes as it can detect the GTT
> > > writes.
> > > 
> > > This should help fix the SKL bug where nothing happens when you type
> > > your username/password on lightdm.
> > > 
> > > This patch was originally pasted on an email by Chris and converted to
> > > an actual git patch by Paulo.
> > > 
> > > Cc: <stable@vger.kernel.org> # 4.6
> > > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > 
> > This is From: Chris but lacks his sob. Can't merge as-is, pls ask him for
> > his sob on irc.
> > 
> > > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/i915_drv.h |  1 +
> > >  drivers/gpu/drm/i915/i915_gem.c | 14 +++++++++++---
> > >  2 files changed, 12 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > index dd18772..17b42a8 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -2157,6 +2157,7 @@ struct drm_i915_gem_object {
> > >  	unsigned int cache_dirty:1;
> > >  
> > >  	unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
> > > +	unsigned int has_wc_mmap:1;
> > >  
> > >  	unsigned int pin_display;
> > >  
> > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > > index 40f90c7..bfafa07 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > > @@ -1608,6 +1608,13 @@ static struct intel_rps_client *to_rps_client(struct drm_file *file)
> > >  	return &fpriv->rps;
> > >  }
> > >  
> > > +static enum fb_op_origin
> > > +write_origin(struct drm_i915_gem_object *obj, unsigned domain)
> > > +{
> > > +	return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ?
> > > +	       ORIGIN_GTT : ORIGIN_CPU;
> > > +}
> > > +
> > >  /**
> > >   * Called when user space prepares to use an object with the CPU, either
> > >   * through the mmap ioctl's mapping or a GTT mapping.
> > > @@ -1661,9 +1668,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
> > >  		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
> > >  
> > >  	if (write_domain != 0)
> > > -		intel_fb_obj_invalidate(obj,
> > > -					write_domain == I915_GEM_DOMAIN_GTT ?
> > > -					ORIGIN_GTT : ORIGIN_CPU);
> > > +		intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
> > >  
> > >  unref:
> > >  	drm_gem_object_unreference(&obj->base);
> > > @@ -1761,6 +1766,9 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
> > >  		else
> > >  			addr = -ENOMEM;
> > >  		up_write(&mm->mmap_sem);
> > > +
> > > +		/* This may race, but that's ok, it only gets set */
> > 
> > This comment doesn't mesh with the
> > 
> > 	unsigned int has_wc_mmap:1;
> > 
> > above. If you depend upon atomicity of writes at the hw level, your struct
> > member _must_ be a full machine word. Which in practice means
> > 
> > 	/*
> > 	 * IMPORTANT: We have unlocked access to this, this must be a
> > 	 * stand-alone bool.
> > 	 */
> > 	bool has_wc_mmap;
> > 
> > You can only fold together different bitfields into one if they're _all_
> > protected by the same lock. gcc needs to generate read-modify-write cycles
> > to write them, which means if any of them are accessed through a separate
> > lock or lock-lessly, there's a real race.
> > 
> > With that fixed (and assuming the ABI fix has still Chris' ack/sob):
> 
> Yup, we can do better and use WRITE_ONCE() as well. That would make GCC
> complain about it being bogus.

+1 for WRITE_ONCE(). Great suggestion, totally forgot about it.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dd18772..17b42a8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2157,6 +2157,7 @@  struct drm_i915_gem_object {
 	unsigned int cache_dirty:1;
 
 	unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
+	unsigned int has_wc_mmap:1;
 
 	unsigned int pin_display;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 40f90c7..bfafa07 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1608,6 +1608,13 @@  static struct intel_rps_client *to_rps_client(struct drm_file *file)
 	return &fpriv->rps;
 }
 
+static enum fb_op_origin
+write_origin(struct drm_i915_gem_object *obj, unsigned domain)
+{
+	return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ?
+	       ORIGIN_GTT : ORIGIN_CPU;
+}
+
 /**
  * Called when user space prepares to use an object with the CPU, either
  * through the mmap ioctl's mapping or a GTT mapping.
@@ -1661,9 +1668,7 @@  i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
 
 	if (write_domain != 0)
-		intel_fb_obj_invalidate(obj,
-					write_domain == I915_GEM_DOMAIN_GTT ?
-					ORIGIN_GTT : ORIGIN_CPU);
+		intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
 
 unref:
 	drm_gem_object_unreference(&obj->base);
@@ -1761,6 +1766,9 @@  i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 		else
 			addr = -ENOMEM;
 		up_write(&mm->mmap_sem);
+
+		/* This may race, but that's ok, it only gets set */
+		to_intel_bo(obj)->has_wc_mmap = true;
 	}
 	drm_gem_object_unreference_unlocked(obj);
 	if (IS_ERR((void *)addr))