diff mbox

[3/4] drm/i915: add workaround to gen7_render_ring_flush

Message ID 1345239344-9968-4-git-send-email-przanoni@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paulo Zanoni Aug. 17, 2012, 9:35 p.m. UTC
From: Paulo Zanoni <paulo.r.zanoni@intel.com>

The combination of this commit + the next one will prevent a lot of
gpu hangs.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

Comments

Daniel Vetter Aug. 28, 2012, 9:17 a.m. UTC | #1
On Fri, Aug 17, 2012 at 06:35:43PM -0300, Paulo Zanoni wrote:
> From: Paulo Zanoni <paulo.r.zanoni@intel.com>
> 
> The combination of this commit + the next one will prevent a lot of
> gpu hangs.
> 
> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>

tbh I'm not happy with the justification in the commit message here. If
there's anything about this in Bspec, I want a full reference, if this is
just due to the simulator, please say so (and maybe paste the complaint
fulsim raises). If it's just empirical evidence please say which machines
this affects (since afaict it's only confirmed to help on hsw).

[Also directed at Ben.]

Thanks, Daniel
> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 24 ++++++++++++++++++++++++
>  1 file changed, 24 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index dc5272b..9895a6e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -263,6 +263,25 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
>  }
>  
>  static int
> +gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
> +{
> +	int ret;
> +
> +	ret = intel_ring_begin(ring, 4);
> +	if (ret)
> +		return ret;
> +
> +	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
> +	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
> +			      PIPE_CONTROL_STALL_AT_SCOREBOARD);
> +	intel_ring_emit(ring, 0);
> +	intel_ring_emit(ring, 0);
> +	intel_ring_advance(ring);
> +
> +	return 0;
> +}
> +
> +static int
>  gen7_render_ring_flush(struct intel_ring_buffer *ring,
>  		       u32 invalidate_domains, u32 flush_domains)
>  {
> @@ -295,6 +314,11 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
>  		 * TLB invalidate requires a post-sync write.
>  		 */
>  		flags |= PIPE_CONTROL_QW_WRITE;
> +
> +		/* Workaround: we must issue a pipe_control with CS-stall bit
> +		 * set before a pipe_control command that has the state cache
> +		 * invalidate bit set. */
> +		gen7_render_ring_cs_stall_wa(ring);
>  	}
>  
>  	ret = intel_ring_begin(ring, 4);
> -- 
> 1.7.11.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Paulo Zanoni Aug. 28, 2012, 1:10 p.m. UTC | #2
2012/8/28 Daniel Vetter <daniel@ffwll.ch>:
> On Fri, Aug 17, 2012 at 06:35:43PM -0300, Paulo Zanoni wrote:
>> From: Paulo Zanoni <paulo.r.zanoni@intel.com>
>>
>> The combination of this commit + the next one will prevent a lot of
>> gpu hangs.
>>
>> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
>
> tbh I'm not happy with the justification in the commit message here. If
> there's anything about this in Bspec, I want a full reference, if this is
> just due to the simulator, please say so (and maybe paste the complaint
> fulsim raises). If it's just empirical evidence please say which machines
> this affects (since afaict it's only confirmed to help on hsw).

It's from Bspec. See the page that describes the PIPE_CONTROL
register. Look for a piece of text that's very similar to the comment
I wrote. It affects IVB and newer.

This is a regression. If you're not happy with this solution, you can
revert all the commits that happened in gen6_render_ring_flush since
we stopped applying the gen6 workarounds to IVB and newer. This
regression makes Haswell a *pain* to use: you're typing on the
terminal and then suddenly everything freezes because there's a GPU
hang. Then 5 seconds later it happens again. Then 2 minutes later,
again. Then again.

Cheers,
Paulo

>
> [Also directed at Ben.]
>
> Thanks, Daniel
>> ---
>>  drivers/gpu/drm/i915/intel_ringbuffer.c | 24 ++++++++++++++++++++++++
>>  1 file changed, 24 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index dc5272b..9895a6e 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -263,6 +263,25 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
>>  }
>>
>>  static int
>> +gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
>> +{
>> +     int ret;
>> +
>> +     ret = intel_ring_begin(ring, 4);
>> +     if (ret)
>> +             return ret;
>> +
>> +     intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
>> +     intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
>> +                           PIPE_CONTROL_STALL_AT_SCOREBOARD);
>> +     intel_ring_emit(ring, 0);
>> +     intel_ring_emit(ring, 0);
>> +     intel_ring_advance(ring);
>> +
>> +     return 0;
>> +}
>> +
>> +static int
>>  gen7_render_ring_flush(struct intel_ring_buffer *ring,
>>                      u32 invalidate_domains, u32 flush_domains)
>>  {
>> @@ -295,6 +314,11 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
>>                * TLB invalidate requires a post-sync write.
>>                */
>>               flags |= PIPE_CONTROL_QW_WRITE;
>> +
>> +             /* Workaround: we must issue a pipe_control with CS-stall bit
>> +              * set before a pipe_control command that has the state cache
>> +              * invalidate bit set. */
>> +             gen7_render_ring_cs_stall_wa(ring);
>>       }
>>
>>       ret = intel_ring_begin(ring, 4);
>> --
>> 1.7.11.2
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> --
> Daniel Vetter
> Mail: daniel@ffwll.ch
> Mobile: +41 (0)79 365 57 48
Daniel Vetter Aug. 28, 2012, 3:17 p.m. UTC | #3
On Tue, Aug 28, 2012 at 10:10:13AM -0300, Paulo Zanoni wrote:
> 2012/8/28 Daniel Vetter <daniel@ffwll.ch>:
> > On Fri, Aug 17, 2012 at 06:35:43PM -0300, Paulo Zanoni wrote:
> >> From: Paulo Zanoni <paulo.r.zanoni@intel.com>
> >>
> >> The combination of this commit + the next one will prevent a lot of
> >> gpu hangs.
> >>
> >> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> >
> > tbh I'm not happy with the justification in the commit message here. If
> > there's anything about this in Bspec, I want a full reference, if this is
> > just due to the simulator, please say so (and maybe paste the complaint
> > fulsim raises). If it's just empirical evidence please say which machines
> > this affects (since afaict it's only confirmed to help on hsw).
> 
> It's from Bspec. See the page that describes the PIPE_CONTROL
> register. Look for a piece of text that's very similar to the comment
> I wrote. It affects IVB and newer.
> 
> This is a regression. If you're not happy with this solution, you can
> revert all the commits that happened in gen6_render_ring_flush since
> we stopped applying the gen6 workarounds to IVB and newer. This
> regression makes Haswell a *pain* to use: you're typing on the
> terminal and then suddenly everything freezes because there's a GPU
> hang. Then 5 seconds later it happens again. Then 2 minutes later,
> again. Then again.

Ok, after some flaming in private on irc we've resolved our differences.
I've applied the first 2 patches as-is and squashed the later 2 workaround
patches together, with some bikeshedding applied (mostly to cite the bspec
in the commit message, cite the comment that introduced the regression and
also mentioned that this restores hsw stability).

Thanks for tracking this down and writing the patches.

Yours, Daniel
> 
> Cheers,
> Paulo
> 
> >
> > [Also directed at Ben.]
> >
> > Thanks, Daniel
> >> ---
> >>  drivers/gpu/drm/i915/intel_ringbuffer.c | 24 ++++++++++++++++++++++++
> >>  1 file changed, 24 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> >> index dc5272b..9895a6e 100644
> >> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> >> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> >> @@ -263,6 +263,25 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
> >>  }
> >>
> >>  static int
> >> +gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
> >> +{
> >> +     int ret;
> >> +
> >> +     ret = intel_ring_begin(ring, 4);
> >> +     if (ret)
> >> +             return ret;
> >> +
> >> +     intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
> >> +     intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
> >> +                           PIPE_CONTROL_STALL_AT_SCOREBOARD);
> >> +     intel_ring_emit(ring, 0);
> >> +     intel_ring_emit(ring, 0);
> >> +     intel_ring_advance(ring);
> >> +
> >> +     return 0;
> >> +}
> >> +
> >> +static int
> >>  gen7_render_ring_flush(struct intel_ring_buffer *ring,
> >>                      u32 invalidate_domains, u32 flush_domains)
> >>  {
> >> @@ -295,6 +314,11 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
> >>                * TLB invalidate requires a post-sync write.
> >>                */
> >>               flags |= PIPE_CONTROL_QW_WRITE;
> >> +
> >> +             /* Workaround: we must issue a pipe_control with CS-stall bit
> >> +              * set before a pipe_control command that has the state cache
> >> +              * invalidate bit set. */
> >> +             gen7_render_ring_cs_stall_wa(ring);
> >>       }
> >>
> >>       ret = intel_ring_begin(ring, 4);
> >> --
> >> 1.7.11.2
> >>
> >> _______________________________________________
> >> Intel-gfx mailing list
> >> Intel-gfx@lists.freedesktop.org
> >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> >
> > --
> > Daniel Vetter
> > Mail: daniel@ffwll.ch
> > Mobile: +41 (0)79 365 57 48
> 
> 
> 
> -- 
> Paulo Zanoni
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index dc5272b..9895a6e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -263,6 +263,25 @@  gen6_render_ring_flush(struct intel_ring_buffer *ring,
 }
 
 static int
+gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
+{
+	int ret;
+
+	ret = intel_ring_begin(ring, 4);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
+	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
+			      PIPE_CONTROL_STALL_AT_SCOREBOARD);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, 0);
+	intel_ring_advance(ring);
+
+	return 0;
+}
+
+static int
 gen7_render_ring_flush(struct intel_ring_buffer *ring,
 		       u32 invalidate_domains, u32 flush_domains)
 {
@@ -295,6 +314,11 @@  gen7_render_ring_flush(struct intel_ring_buffer *ring,
 		 * TLB invalidate requires a post-sync write.
 		 */
 		flags |= PIPE_CONTROL_QW_WRITE;
+
+		/* Workaround: we must issue a pipe_control with CS-stall bit
+		 * set before a pipe_control command that has the state cache
+		 * invalidate bit set. */
+		gen7_render_ring_cs_stall_wa(ring);
 	}
 
 	ret = intel_ring_begin(ring, 4);