diff mbox series

[1/2] drm/i915/icl: Implement gen11 flush including tile cache

Message ID 20190812160108.16973-1-mika.kuoppala@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/i915/icl: Implement gen11 flush including tile cache | expand

Commit Message

Mika Kuoppala Aug. 12, 2019, 4:01 p.m. UTC
Add tile cache flushing for gen11. To relive us from the
burden of previous obsolete workarounds, make a dedicated
flush/invalidate callback for gen11.

To fortify an independent single flush, do post
sync op as there are indications that without it
we don't flush everything. This should also make this
callback more readily usable in tgl (see l3 fabric flush).

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  1 +
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 63 +++++++++++++++++++-
 2 files changed, 63 insertions(+), 1 deletion(-)

Comments

Chris Wilson Aug. 12, 2019, 4:16 p.m. UTC | #1
Quoting Mika Kuoppala (2019-08-12 17:01:07)
> Add tile cache flushing for gen11. To relive us from the
> burden of previous obsolete workarounds, make a dedicated
> flush/invalidate callback for gen11.
> 
> To fortify an independent single flush, do post
> sync op as there are indications that without it
> we don't flush everything. This should also make this
> callback more readily usable in tgl (see l3 fabric flush).
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  1 +
>  drivers/gpu/drm/i915/gt/intel_lrc.c          | 63 +++++++++++++++++++-
>  2 files changed, 63 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index 6a0879c27d14..929a17e54f2c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -208,6 +208,7 @@
>  #define   DISPLAY_PLANE_A           (0<<20)
>  #define   DISPLAY_PLANE_B           (1<<20)
>  #define GFX_OP_PIPE_CONTROL(len)       ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
> +#define   PIPE_CONTROL_TILE_CACHE_FLUSH                        (1<<28) /* gen11+ */
Ack.

>  #define   PIPE_CONTROL_FLUSH_L3                                (1<<27)
>  #define   PIPE_CONTROL_GLOBAL_GTT_IVB                  (1<<24) /* gen7+ */
>  #define   PIPE_CONTROL_MMIO_WRITE                      (1<<23)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index b97047d58d3d..891db3c933c9 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2655,6 +2655,64 @@ static int gen8_emit_flush_render(struct i915_request *request,
>         return 0;
>  }
>  
> +static int gen11_emit_flush_render(struct i915_request *request,
> +                                  u32 mode)
> +{
> +       struct intel_engine_cs *engine = request->engine;
> +       const u32 scratch_addr =
> +               intel_gt_scratch_offset(engine->gt,
> +                                       INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
> +
> +       if (mode & EMIT_FLUSH) {
> +               u32 *cs;
> +               u32 flags = 0;
> +
> +               flags |= PIPE_CONTROL_CS_STALL;
> +
> +               flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
> +               flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
> +               flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
> +               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
> +               flags |= PIPE_CONTROL_FLUSH_ENABLE;
> +               flags |= PIPE_CONTROL_QW_WRITE;
> +               flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;

Ok.

> +
> +               cs = intel_ring_begin(request, 6);
> +               if (IS_ERR(cs))
> +                       return PTR_ERR(cs);
> +
> +               cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
> +

Personally, I wouldn't leave a newline here.
> +               intel_ring_advance(request, cs);
> +       }
> +
> +       if (mode & EMIT_INVALIDATE) {
> +               u32 *cs;
> +               u32 flags = 0;
> +
> +               flags |= PIPE_CONTROL_CS_STALL;
> +
> +               flags |= PIPE_CONTROL_TLB_INVALIDATE;
> +               flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
> +               flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
> +               flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
> +               flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
> +               flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
> +               flags |= PIPE_CONTROL_QW_WRITE;
> +               flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;

Ok.

> +
> +               cs = intel_ring_begin(request, 6);
> +               if (IS_ERR(cs))
> +                       return PTR_ERR(cs);
> +
> +               cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
> +
> +               intel_ring_advance(request, cs);
> +       }
> +
> +       return 0;
> +}
> +
>  /*
>   * Reserve space for 2 NOOPs at the end of each request to be
>   * used as a workaround for not being allowed to do lite
> @@ -2829,7 +2887,10 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
>         logical_ring_default_irqs(engine);
>  
>         if (engine->class == RENDER_CLASS) {
> -               engine->emit_flush = gen8_emit_flush_render;
> +               if (INTEL_GEN(engine->i915) >= 11)
> +                       engine->emit_flush = gen11_emit_flush_render;
> +               else
> +                       engine->emit_flush = gen8_emit_flush_render;
>                 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;

No fini breadcrumb flush?
-Chris
Mika Kuoppala Aug. 13, 2019, 7:57 a.m. UTC | #2
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-08-12 17:01:07)
>> Add tile cache flushing for gen11. To relive us from the
>> burden of previous obsolete workarounds, make a dedicated
>> flush/invalidate callback for gen11.
>> 
>> To fortify an independent single flush, do post
>> sync op as there are indications that without it
>> we don't flush everything. This should also make this
>> callback more readily usable in tgl (see l3 fabric flush).
>> 
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> ---
>>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  1 +
>>  drivers/gpu/drm/i915/gt/intel_lrc.c          | 63 +++++++++++++++++++-
>>  2 files changed, 63 insertions(+), 1 deletion(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>> index 6a0879c27d14..929a17e54f2c 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>> @@ -208,6 +208,7 @@
>>  #define   DISPLAY_PLANE_A           (0<<20)
>>  #define   DISPLAY_PLANE_B           (1<<20)
>>  #define GFX_OP_PIPE_CONTROL(len)       ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
>> +#define   PIPE_CONTROL_TILE_CACHE_FLUSH                        (1<<28) /* gen11+ */
> Ack.
>
>>  #define   PIPE_CONTROL_FLUSH_L3                                (1<<27)
>>  #define   PIPE_CONTROL_GLOBAL_GTT_IVB                  (1<<24) /* gen7+ */
>>  #define   PIPE_CONTROL_MMIO_WRITE                      (1<<23)
>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> index b97047d58d3d..891db3c933c9 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> @@ -2655,6 +2655,64 @@ static int gen8_emit_flush_render(struct i915_request *request,
>>         return 0;
>>  }
>>  
>> +static int gen11_emit_flush_render(struct i915_request *request,
>> +                                  u32 mode)
>> +{
>> +       struct intel_engine_cs *engine = request->engine;
>> +       const u32 scratch_addr =
>> +               intel_gt_scratch_offset(engine->gt,
>> +                                       INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
>> +
>> +       if (mode & EMIT_FLUSH) {
>> +               u32 *cs;
>> +               u32 flags = 0;
>> +
>> +               flags |= PIPE_CONTROL_CS_STALL;
>> +
>> +               flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
>> +               flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
>> +               flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
>> +               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
>> +               flags |= PIPE_CONTROL_FLUSH_ENABLE;
>> +               flags |= PIPE_CONTROL_QW_WRITE;
>> +               flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
>
> Ok.
>
>> +
>> +               cs = intel_ring_begin(request, 6);
>> +               if (IS_ERR(cs))
>> +                       return PTR_ERR(cs);
>> +
>> +               cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
>> +
>
> Personally, I wouldn't leave a newline here.

Will remove. Pipe control length could be also defined but meh.

>> +               intel_ring_advance(request, cs);
>> +       }
>> +
>> +       if (mode & EMIT_INVALIDATE) {
>> +               u32 *cs;
>> +               u32 flags = 0;
>> +
>> +               flags |= PIPE_CONTROL_CS_STALL;
>> +
>> +               flags |= PIPE_CONTROL_TLB_INVALIDATE;
>> +               flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
>> +               flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
>> +               flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
>> +               flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
>> +               flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
>> +               flags |= PIPE_CONTROL_QW_WRITE;
>> +               flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
>
> Ok.
>
>> +
>> +               cs = intel_ring_begin(request, 6);
>> +               if (IS_ERR(cs))
>> +                       return PTR_ERR(cs);
>> +
>> +               cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
>> +
>> +               intel_ring_advance(request, cs);
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>>  /*
>>   * Reserve space for 2 NOOPs at the end of each request to be
>>   * used as a workaround for not being allowed to do lite
>> @@ -2829,7 +2887,10 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
>>         logical_ring_default_irqs(engine);
>>  
>>         if (engine->class == RENDER_CLASS) {
>> -               engine->emit_flush = gen8_emit_flush_render;
>> +               if (INTEL_GEN(engine->i915) >= 11)
>> +                       engine->emit_flush = gen11_emit_flush_render;
>> +               else
>> +                       engine->emit_flush = gen8_emit_flush_render;
>>                 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
>
> No fini breadcrumb flush?

Well there are room for cleaning there for sure. I admit to being
a bit coward. I don't have an icelake and changing that would
warrant quite amount of gem_concurrent_blt cycles.

If you can convince me that ci/bat will catch my mistakes,
I could reconsider.

-Mika
Chris Wilson Aug. 13, 2019, 8:01 a.m. UTC | #3
Quoting Mika Kuoppala (2019-08-13 08:57:51)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Quoting Mika Kuoppala (2019-08-12 17:01:07)
> >> @@ -2829,7 +2887,10 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
> >>         logical_ring_default_irqs(engine);
> >>  
> >>         if (engine->class == RENDER_CLASS) {
> >> -               engine->emit_flush = gen8_emit_flush_render;
> >> +               if (INTEL_GEN(engine->i915) >= 11)
> >> +                       engine->emit_flush = gen11_emit_flush_render;
> >> +               else
> >> +                       engine->emit_flush = gen8_emit_flush_render;
> >>                 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
> >
> > No fini breadcrumb flush?
> 
> Well there are room for cleaning there for sure. I admit to being
> a bit coward. I don't have an icelake and changing that would
> warrant quite amount of gem_concurrent_blt cycles.
> 
> If you can convince me that ci/bat will catch my mistakes,
> I could reconsider.

It's the vital one. Changing emit_flush will have no impact by itself...
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 6a0879c27d14..929a17e54f2c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -208,6 +208,7 @@ 
 #define   DISPLAY_PLANE_A           (0<<20)
 #define   DISPLAY_PLANE_B           (1<<20)
 #define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
+#define   PIPE_CONTROL_TILE_CACHE_FLUSH			(1<<28) /* gen11+ */
 #define   PIPE_CONTROL_FLUSH_L3				(1<<27)
 #define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24) /* gen7+ */
 #define   PIPE_CONTROL_MMIO_WRITE			(1<<23)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index b97047d58d3d..891db3c933c9 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2655,6 +2655,64 @@  static int gen8_emit_flush_render(struct i915_request *request,
 	return 0;
 }
 
+static int gen11_emit_flush_render(struct i915_request *request,
+				   u32 mode)
+{
+	struct intel_engine_cs *engine = request->engine;
+	const u32 scratch_addr =
+		intel_gt_scratch_offset(engine->gt,
+					INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
+
+	if (mode & EMIT_FLUSH) {
+		u32 *cs;
+		u32 flags = 0;
+
+		flags |= PIPE_CONTROL_CS_STALL;
+
+		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+		flags |= PIPE_CONTROL_FLUSH_ENABLE;
+		flags |= PIPE_CONTROL_QW_WRITE;
+		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
+		cs = intel_ring_begin(request, 6);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+
+		intel_ring_advance(request, cs);
+	}
+
+	if (mode & EMIT_INVALIDATE) {
+		u32 *cs;
+		u32 flags = 0;
+
+		flags |= PIPE_CONTROL_CS_STALL;
+
+		flags |= PIPE_CONTROL_TLB_INVALIDATE;
+		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_QW_WRITE;
+		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
+		cs = intel_ring_begin(request, 6);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+
+		intel_ring_advance(request, cs);
+	}
+
+	return 0;
+}
+
 /*
  * Reserve space for 2 NOOPs at the end of each request to be
  * used as a workaround for not being allowed to do lite
@@ -2829,7 +2887,10 @@  int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 	logical_ring_default_irqs(engine);
 
 	if (engine->class == RENDER_CLASS) {
-		engine->emit_flush = gen8_emit_flush_render;
+		if (INTEL_GEN(engine->i915) >= 11)
+			engine->emit_flush = gen11_emit_flush_render;
+		else
+			engine->emit_flush = gen8_emit_flush_render;
 		engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
 	}