diff mbox

drm/i915: Serialize GTT Updates on BXT

Message ID 1495476445-23324-1-git-send-email-jon.bloomfield@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Bloomfield, Jon May 22, 2017, 6:07 p.m. UTC
BXT requires accesses to the GTT (i.e. PTE updates) to be serialized
when IOMMU is enabled. This patch guarantees this by wrapping all
updates in stop_machine and using a flushing read to guarantee that
the GTT writes have reached their destination before restarting.

Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Signed-off-by: John Harrison <john.C.Harrison@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 106 ++++++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)

Comments

Chris Wilson May 22, 2017, 8:05 p.m. UTC | #1
On Mon, May 22, 2017 at 11:07:25AM -0700, Jon Bloomfield wrote:
> BXT requires accesses to the GTT (i.e. PTE updates) to be serialized
> when IOMMU is enabled.

Serialised with what, since all writes are serialized already?

The reason is that you need to explain the hw model you are protecting,
for example do clears need to be protected?

> This patch guarantees this by wrapping all
> updates in stop_machine and using a flushing read to guarantee that
> the GTT writes have reached their destination before restarting.

If you mention which patch you are reinstating (for a new problem) and
cc the author, he might point out what has changed in the meantime.

Note, the flush here is not about ensuring the GTT writes reach their
destination.
 
> Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>

If you are the author and sender, what is John's s-o-b doing afterwards?

> Signed-off-by: John Harrison <john.C.Harrison@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 106 ++++++++++++++++++++++++++++++++++++
>  1 file changed, 106 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 7c769d7..6360d92 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2191,6 +2191,100 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
>  		gen8_set_pte(&gtt_base[i], scratch_pte);
>  }
>  
> +#ifdef CONFIG_INTEL_IOMMU
> +struct insert_page {
> +	struct i915_address_space *vm;
> +	dma_addr_t addr;
> +	u64 offset;
> +	enum i915_cache_level level;
> +};
> +
> +static int gen8_ggtt_insert_page__cb(void *_arg)
> +{
> +	struct insert_page *arg = _arg;
> +
> +	struct drm_i915_private *dev_priv = arg->vm->i915;
> +
> +	gen8_ggtt_insert_page(arg->vm, arg->addr,
> +				arg->offset, arg->level, 0);
> +
> +	POSTING_READ(GFX_FLSH_CNTL_GEN6);

This is now just a call to i915_ggtt_invalidate() because we are now
also responsible for invalidating the guc tlbs as well as the chipset.
And more importantly it is already done by gen8_ggtt_insert_page.

All the POSTING_READ(GFX_FLSH_CNTL_GEN6) are spurious.

>  static void gen6_ggtt_clear_range(struct i915_address_space *vm,
>  				  u64 start, u64 length)
>  {
> @@ -2789,6 +2883,18 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>  
>  	ggtt->base.insert_entries = gen8_ggtt_insert_entries;
>  
> +#ifdef CONFIG_INTEL_IOMMU
> +	/* Serialize GTT updates on BXT if VT-d is on. */
> +	if (IS_BROXTON(dev_priv) && intel_iommu_gfx_mapped) {

Move to a header and don't ifdef out the users. A small cost in object
side for the benefit of keeping these ifdef out of code.
-Chris
Bloomfield, Jon May 22, 2017, 10:18 p.m. UTC | #2
> -----Original Message-----
> From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> Sent: Monday, May 22, 2017 1:05 PM
> To: Bloomfield, Jon <jon.bloomfield@intel.com>
> Cc: intel-gfx@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH] drm/i915: Serialize GTT Updates on BXT
> 
> On Mon, May 22, 2017 at 11:07:25AM -0700, Jon Bloomfield wrote:
> > BXT requires accesses to the GTT (i.e. PTE updates) to be serialized
> > when IOMMU is enabled.
> 
> Serialised with what, since all writes are serialized already?

Fair cop guv. I'll reword.

> 
> The reason is that you need to explain the hw model you are protecting, for
> example do clears need to be protected?
> 
> > This patch guarantees this by wrapping all updates in stop_machine and
> > using a flushing read to guarantee that the GTT writes have reached
> > their destination before restarting.
> 
> If you mention which patch you are reinstating (for a new problem) and cc
> the author, he might point out what has changed in the meantime.

I don't understand. I'm not re-instating any patches to my knowledge, so it's a bit hard to cc the author.

> 
> Note, the flush here is not about ensuring the GTT writes reach their
> destination.
> 
> > Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
> 
> If you are the author and sender, what is John's s-o-b doing afterwards?
This patch was previously signed off by John.

> 
> > Signed-off-by: John Harrison <john.C.Harrison@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 106
> > ++++++++++++++++++++++++++++++++++++
> >  1 file changed, 106 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index 7c769d7..6360d92 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -2191,6 +2191,100 @@ static void gen8_ggtt_clear_range(struct
> i915_address_space *vm,
> >  		gen8_set_pte(&gtt_base[i], scratch_pte);  }
> >
> > +#ifdef CONFIG_INTEL_IOMMU
> > +struct insert_page {
> > +	struct i915_address_space *vm;
> > +	dma_addr_t addr;
> > +	u64 offset;
> > +	enum i915_cache_level level;
> > +};
> > +
> > +static int gen8_ggtt_insert_page__cb(void *_arg) {
> > +	struct insert_page *arg = _arg;
> > +
> > +	struct drm_i915_private *dev_priv = arg->vm->i915;
> > +
> > +	gen8_ggtt_insert_page(arg->vm, arg->addr,
> > +				arg->offset, arg->level, 0);
> > +
> > +	POSTING_READ(GFX_FLSH_CNTL_GEN6);
> 
> This is now just a call to i915_ggtt_invalidate() because we are now also
> responsible for invalidating the guc tlbs as well as the chipset.
> And more importantly it is already done by gen8_ggtt_insert_page.
> 
> All the POSTING_READ(GFX_FLSH_CNTL_GEN6) are spurious.

Are you sure - The purpose of the register read is to ensure that all the PTE writes are flushed from the h/w queue
before we restart the machine. It is critical that all the PTE writes have left this queue before any other accesses are
allowed to begin.
Isn't the invalidate a posted write ? If so, it won't drain the queue.
Even if the invalidate is guaranteed to effect this pipeline flish, the clear_page path doesn't call invalidate, so it's
certainly required there.

> 
> >  static void gen6_ggtt_clear_range(struct i915_address_space *vm,
> >  				  u64 start, u64 length)
> >  {
> > @@ -2789,6 +2883,18 @@ static int gen8_gmch_probe(struct i915_ggtt
> > *ggtt)
> >
> >  	ggtt->base.insert_entries = gen8_ggtt_insert_entries;
> >
> > +#ifdef CONFIG_INTEL_IOMMU
> > +	/* Serialize GTT updates on BXT if VT-d is on. */
> > +	if (IS_BROXTON(dev_priv) && intel_iommu_gfx_mapped) {
> 
> Move to a header and don't ifdef out the users. A small cost in object side for
> the benefit of keeping these ifdef out of code.
Move what to a header ? You mean create a macro for the test, the whole block, or something else ?

I was following the pattern used elsewhere in the code in the vain hope that by following established
convention we might avoid bike-shedding. Every single use of intel_iommu_gfx_mapped in this file is protected by
the #ifdef.

> -Chris
> 
> --
> Chris Wilson, Intel Open Source Technology Centre
Chris Wilson May 23, 2017, 7:33 a.m. UTC | #3
On Mon, May 22, 2017 at 10:18:31PM +0000, Bloomfield, Jon wrote:
> > -----Original Message-----
> > From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> > Sent: Monday, May 22, 2017 1:05 PM
> > To: Bloomfield, Jon <jon.bloomfield@intel.com>
> > Cc: intel-gfx@lists.freedesktop.org
> > Subject: Re: [Intel-gfx] [PATCH] drm/i915: Serialize GTT Updates on BXT
> > 
> > On Mon, May 22, 2017 at 11:07:25AM -0700, Jon Bloomfield wrote:
> > > BXT requires accesses to the GTT (i.e. PTE updates) to be serialized
> > > when IOMMU is enabled.
> > 
> > Serialised with what, since all writes are serialized already?
> 
> Fair cop guv. I'll reword.
> 
> > 
> > The reason is that you need to explain the hw model you are protecting, for
> > example do clears need to be protected?
> > 
> > > This patch guarantees this by wrapping all updates in stop_machine and
> > > using a flushing read to guarantee that the GTT writes have reached
> > > their destination before restarting.
> > 
> > If you mention which patch you are reinstating (for a new problem) and cc
> > the author, he might point out what has changed in the meantime.
> 
> I don't understand. I'm not re-instating any patches to my knowledge, so it's a bit hard to cc the author.

Please then review history before submitting *copied* code.

> > Note, the flush here is not about ensuring the GTT writes reach their
> > destination.
> > 
> > > Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
> > 
> > If you are the author and sender, what is John's s-o-b doing afterwards?
> This patch was previously signed off by John.
> 
> > 
> > > Signed-off-by: John Harrison <john.C.Harrison@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/i915_gem_gtt.c | 106
> > > ++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 106 insertions(+)
> > >
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > index 7c769d7..6360d92 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > @@ -2191,6 +2191,100 @@ static void gen8_ggtt_clear_range(struct
> > i915_address_space *vm,
> > >  		gen8_set_pte(&gtt_base[i], scratch_pte);  }
> > >
> > > +#ifdef CONFIG_INTEL_IOMMU
> > > +struct insert_page {
> > > +	struct i915_address_space *vm;
> > > +	dma_addr_t addr;
> > > +	u64 offset;
> > > +	enum i915_cache_level level;
> > > +};
> > > +
> > > +static int gen8_ggtt_insert_page__cb(void *_arg) {
> > > +	struct insert_page *arg = _arg;
> > > +
> > > +	struct drm_i915_private *dev_priv = arg->vm->i915;
> > > +
> > > +	gen8_ggtt_insert_page(arg->vm, arg->addr,
> > > +				arg->offset, arg->level, 0);
> > > +
> > > +	POSTING_READ(GFX_FLSH_CNTL_GEN6);
> > 
> > This is now just a call to i915_ggtt_invalidate() because we are now also
> > responsible for invalidating the guc tlbs as well as the chipset.
> > And more importantly it is already done by gen8_ggtt_insert_page.
> > 
> > All the POSTING_READ(GFX_FLSH_CNTL_GEN6) are spurious.
> 
> Are you sure - The purpose of the register read is to ensure that all the PTE writes are flushed from the h/w queue
> before we restart the machine. It is critical that all the PTE writes have left this queue before any other accesses are
> allowed to begin.
> Isn't the invalidate a posted write ? If so, it won't drain the queue.
> Even if the invalidate is guaranteed to effect this pipeline flish, the clear_page path doesn't call invalidate, so it's
> certainly required there.

It's an uncached mmio write. It is strongly ordered and serial. Besides
if you feel it is wrong, fix the bug at source.

> > >  static void gen6_ggtt_clear_range(struct i915_address_space *vm,
> > >  				  u64 start, u64 length)
> > >  {
> > > @@ -2789,6 +2883,18 @@ static int gen8_gmch_probe(struct i915_ggtt
> > > *ggtt)
> > >
> > >  	ggtt->base.insert_entries = gen8_ggtt_insert_entries;
> > >
> > > +#ifdef CONFIG_INTEL_IOMMU
> > > +	/* Serialize GTT updates on BXT if VT-d is on. */
> > > +	if (IS_BROXTON(dev_priv) && intel_iommu_gfx_mapped) {
> > 
> > Move to a header and don't ifdef out the users. A small cost in object side for
> > the benefit of keeping these ifdef out of code.
> Move what to a header ? You mean create a macro for the test, the whole block, or something else ?
> 
> I was following the pattern used elsewhere in the code in the vain hope that by following established
> convention we might avoid bike-shedding. Every single use of intel_iommu_gfx_mapped in this file is protected by
> the #ifdef.

Otoh, the recent patches adding more intel_iommu_gfx_mapped have avoided
adding ifdefs to the code.
-Chris
Chris Wilson May 23, 2017, 8:40 a.m. UTC | #4
On Mon, May 22, 2017 at 11:07:25AM -0700, Jon Bloomfield wrote:
> BXT requires accesses to the GTT (i.e. PTE updates) to be serialized
> when IOMMU is enabled. This patch guarantees this by wrapping all
> updates in stop_machine and using a flushing read to guarantee that
> the GTT writes have reached their destination before restarting.
> 

Testcase? igt/gem_concurrent_blit shows the failure.
-Chris
Bloomfield, Jon May 23, 2017, 2:28 p.m. UTC | #5
> -----Original Message-----
> From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> Sent: Tuesday, May 23, 2017 12:33 AM
> To: Bloomfield, Jon <jon.bloomfield@intel.com>
> Cc: intel-gfx@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH] drm/i915: Serialize GTT Updates on BXT
> 
> On Mon, May 22, 2017 at 10:18:31PM +0000, Bloomfield, Jon wrote:
> > > -----Original Message-----
> > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> > > Sent: Monday, May 22, 2017 1:05 PM
> > > To: Bloomfield, Jon <jon.bloomfield@intel.com>
> > > Cc: intel-gfx@lists.freedesktop.org
> > > Subject: Re: [Intel-gfx] [PATCH] drm/i915: Serialize GTT Updates on BXT
> > >
> > > On Mon, May 22, 2017 at 11:07:25AM -0700, Jon Bloomfield wrote:
> > > > BXT requires accesses to the GTT (i.e. PTE updates) to be serialized
> > > > when IOMMU is enabled.
> > >
> > > Serialised with what, since all writes are serialized already?
> >
> > Fair cop guv. I'll reword.
> >
> > >
> > > The reason is that you need to explain the hw model you are protecting,
> for
> > > example do clears need to be protected?
> > >
> > > > This patch guarantees this by wrapping all updates in stop_machine and
> > > > using a flushing read to guarantee that the GTT writes have reached
> > > > their destination before restarting.
> > >
> > > If you mention which patch you are reinstating (for a new problem) and
> cc
> > > the author, he might point out what has changed in the meantime.
> >
> > I don't understand. I'm not re-instating any patches to my knowledge, so
> it's a bit hard to cc the author.
> 
> Please then review history before submitting *copied* code.
> 
> > > Note, the flush here is not about ensuring the GTT writes reach their
> > > destination.
> > >
> > > > Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
> > >
> > > If you are the author and sender, what is John's s-o-b doing afterwards?
> > This patch was previously signed off by John.
> >
> > >
> > > > Signed-off-by: John Harrison <john.C.Harrison@intel.com>
> > > > ---
> > > >  drivers/gpu/drm/i915/i915_gem_gtt.c | 106
> > > > ++++++++++++++++++++++++++++++++++++
> > > >  1 file changed, 106 insertions(+)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > > b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > > index 7c769d7..6360d92 100644
> > > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > > @@ -2191,6 +2191,100 @@ static void gen8_ggtt_clear_range(struct
> > > i915_address_space *vm,
> > > >  		gen8_set_pte(&gtt_base[i], scratch_pte);  }
> > > >
> > > > +#ifdef CONFIG_INTEL_IOMMU
> > > > +struct insert_page {
> > > > +	struct i915_address_space *vm;
> > > > +	dma_addr_t addr;
> > > > +	u64 offset;
> > > > +	enum i915_cache_level level;
> > > > +};
> > > > +
> > > > +static int gen8_ggtt_insert_page__cb(void *_arg) {
> > > > +	struct insert_page *arg = _arg;
> > > > +
> > > > +	struct drm_i915_private *dev_priv = arg->vm->i915;
> > > > +
> > > > +	gen8_ggtt_insert_page(arg->vm, arg->addr,
> > > > +				arg->offset, arg->level, 0);
> > > > +
> > > > +	POSTING_READ(GFX_FLSH_CNTL_GEN6);
> > >
> > > This is now just a call to i915_ggtt_invalidate() because we are now also
> > > responsible for invalidating the guc tlbs as well as the chipset.
> > > And more importantly it is already done by gen8_ggtt_insert_page.
> > >
> > > All the POSTING_READ(GFX_FLSH_CNTL_GEN6) are spurious.
> >
> > Are you sure - The purpose of the register read is to ensure that all the PTE
> writes are flushed from the h/w queue
> > before we restart the machine. It is critical that all the PTE writes have left
> this queue before any other accesses are
> > allowed to begin.
> > Isn't the invalidate a posted write ? If so, it won't drain the queue.
> > Even if the invalidate is guaranteed to effect this pipeline flish, the
> clear_page path doesn't call invalidate, so it's
> > certainly required there.
> 
> It's an uncached mmio write. It is strongly ordered and serial. Besides
> if you feel it is wrong, fix the bug at source.

Strongly ordered is not enough, nor is being uncached - that just ensures the PTE
writes have left the CPU. We need to ensure they have left the GAM before we
allow any following Aperture accesses to reach the GAM. On the other hand it
may be that the write to the flushctl reg will explicitly cause the h/w to clear the
fifo. I'll check with hw.

Re fixing at source: Assuming you mean just put the read into the gtt functions 
next to the invalidate, then I can do that, but it would mean either another 
bxt/iommu test or executing the read unconditionally for all gens. Are you happy
with that, and if so which ?

> 
> > > >  static void gen6_ggtt_clear_range(struct i915_address_space *vm,
> > > >  				  u64 start, u64 length)
> > > >  {
> > > > @@ -2789,6 +2883,18 @@ static int gen8_gmch_probe(struct i915_ggtt
> > > > *ggtt)
> > > >
> > > >  	ggtt->base.insert_entries = gen8_ggtt_insert_entries;
> > > >
> > > > +#ifdef CONFIG_INTEL_IOMMU
> > > > +	/* Serialize GTT updates on BXT if VT-d is on. */
> > > > +	if (IS_BROXTON(dev_priv) && intel_iommu_gfx_mapped) {
> > >
> > > Move to a header and don't ifdef out the users. A small cost in object side
> for
> > > the benefit of keeping these ifdef out of code.
> > Move what to a header ? You mean create a macro for the test, the whole
> block, or something else ?
> >
> > I was following the pattern used elsewhere in the code in the vain hope
> that by following established
> > convention we might avoid bike-shedding. Every single use of
> intel_iommu_gfx_mapped in this file is protected by
> > the #ifdef.
> 
> Otoh, the recent patches adding more intel_iommu_gfx_mapped have
> avoided
> adding ifdefs to the code.

I've searched the i915 directory on drm-intel-nightly and not found one instance
of intel_iommu_gfx_mapped that is outside a #ifdef. But I'm happy to make my
patch the only one.

> -Chris
> 
> --
> Chris Wilson, Intel Open Source Technology Centre
Bloomfield, Jon May 23, 2017, 2:35 p.m. UTC | #6
> -----Original Message-----
> From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> Sent: Tuesday, May 23, 2017 1:41 AM
> To: Bloomfield, Jon <jon.bloomfield@intel.com>
> Cc: intel-gfx@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH] drm/i915: Serialize GTT Updates on BXT
> 
> On Mon, May 22, 2017 at 11:07:25AM -0700, Jon Bloomfield wrote:
> > BXT requires accesses to the GTT (i.e. PTE updates) to be serialized
> > when IOMMU is enabled. This patch guarantees this by wrapping all
> > updates in stop_machine and using a flushing read to guarantee that
> > the GTT writes have reached their destination before restarting.
> >
> 
> Testcase? igt/gem_concurrent_blit shows the failure.

I was using a combination of tests, run in parallel to hit this bug. I need to get hold of
a system again to re-run. Are you saying you have also repro'd the bug just with
gem_concurrent_blit or just suggesting that it might be a good candidate ?

I'll also re-try without the flushing read, but I'm wary of removing this unless I can
understand why the mmio write has the same effect. It might be luck.

> -Chris
> 
> --
> Chris Wilson, Intel Open Source Technology Centre
Bloomfield, Jon May 23, 2017, 11:44 p.m. UTC | #7
> -----Original Message-----
> From: Bloomfield, Jon
> Sent: Tuesday, May 23, 2017 7:28 AM
> To: 'Chris Wilson' <chris@chris-wilson.co.uk>
> Cc: intel-gfx@lists.freedesktop.org
> Subject: RE: [Intel-gfx] [PATCH] drm/i915: Serialize GTT Updates on BXT
> 

> > -----Original Message-----
> > From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> > Sent: Tuesday, May 23, 2017 12:33 AM
> > To: Bloomfield, Jon <jon.bloomfield@intel.com>
> > Cc: intel-gfx@lists.freedesktop.org
> > Subject: Re: [Intel-gfx] [PATCH] drm/i915: Serialize GTT Updates on BXT
> > It's an uncached mmio write. It is strongly ordered and serial. Besides
> > if you feel it is wrong, fix the bug at source.
> 
> Strongly ordered is not enough, nor is being uncached - that just ensures the
> PTE
> writes have left the CPU. We need to ensure they have left the GAM before
> we
> allow any following Aperture accesses to reach the GAM. On the other hand
> it
> may be that the write to the flushctl reg will explicitly cause the h/w to clear
> the
> fifo. I'll check with hw.

H/W have confirmed that the flushing write is not sufficient to avoid the bug. 
In their words [almost]:

	"You need the read.  The [FLSH_CTRL write] will invalidate the GTLB.  
	 However, it won't ensure a [Aperture] read isn't in the pipe."

So the mmio read needs to stay, and we're left with where to issue it. I placed 
it in the _cb function because it is only needed for BXT and doing it there 
avoids any (admittedly small) extra overhead for other devices. But I have 
no strong opinion on this, so if you really want it to go into the main function
I'll move it. I do think it should be conditional though.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7c769d7..6360d92 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2191,6 +2191,100 @@  static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 		gen8_set_pte(&gtt_base[i], scratch_pte);
 }
 
+#ifdef CONFIG_INTEL_IOMMU
+struct insert_page {
+	struct i915_address_space *vm;
+	dma_addr_t addr;
+	u64 offset;
+	enum i915_cache_level level;
+};
+
+static int gen8_ggtt_insert_page__cb(void *_arg)
+{
+	struct insert_page *arg = _arg;
+
+	struct drm_i915_private *dev_priv = arg->vm->i915;
+
+	gen8_ggtt_insert_page(arg->vm, arg->addr,
+				arg->offset, arg->level, 0);
+
+	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+
+	return 0;
+}
+
+static void gen8_ggtt_insert_page__BKL(struct i915_address_space *vm,
+				       dma_addr_t addr,
+				       u64 offset,
+				       enum i915_cache_level level,
+				       u32 unused)
+{
+	struct insert_page arg = { vm, addr, offset, level };
+
+	stop_machine(gen8_ggtt_insert_page__cb, &arg, NULL);
+}
+
+
+struct insert_entries {
+	struct i915_address_space *vm;
+	struct sg_table *st;
+	u64 start;
+	enum i915_cache_level level;
+};
+
+static int gen8_ggtt_insert_entries__cb(void *_arg)
+{
+	struct insert_entries *arg = _arg;
+
+	struct drm_i915_private *dev_priv = arg->vm->i915;
+
+	gen8_ggtt_insert_entries(arg->vm, arg->st,
+					arg->start, arg->level, 0);
+
+	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+
+	return 0;
+}
+
+static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
+					  struct sg_table *st,
+					  u64 start,
+					  enum i915_cache_level level,
+					  u32 unused)
+{
+	struct insert_entries arg = { vm, st, start, level };
+
+	stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL);
+}
+
+struct clear_range {
+	struct i915_address_space *vm;
+	u64 start;
+	u64 length;
+};
+
+static int gen8_ggtt_clear_range__cb(void *_arg)
+{
+	struct clear_range *arg = _arg;
+
+	struct drm_i915_private *dev_priv = arg->vm->i915;
+
+	gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
+
+	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+
+	return 0;
+}
+
+static void gen8_ggtt_clear_range__BKL(struct i915_address_space *vm,
+                                       u64 start,
+                                       u64 length)
+{
+	struct clear_range arg = { vm, start, length };
+	stop_machine(gen8_ggtt_clear_range__cb, &arg, NULL);
+}
+#endif
+
 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 				  u64 start, u64 length)
 {
@@ -2789,6 +2883,18 @@  static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 
 	ggtt->base.insert_entries = gen8_ggtt_insert_entries;
 
+#ifdef CONFIG_INTEL_IOMMU
+	/* Serialize GTT updates on BXT if VT-d is on. */
+	if (IS_BROXTON(dev_priv) && intel_iommu_gfx_mapped) {
+		ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL;
+		ggtt->base.insert_page    = gen8_ggtt_insert_page__BKL;
+		if (!USES_FULL_PPGTT(dev_priv) ||
+		    intel_scanout_needs_vtd_wa(dev_priv)) {
+			ggtt->base.clear_range = gen8_ggtt_clear_range__BKL;
+		}
+	}
+#endif
+
 	ggtt->invalidate = gen6_ggtt_invalidate;
 
 	return ggtt_probe_common(ggtt, size);