diff mbox series

[v2,3/3] drm/i915/perf: enable OAR context save/restore of performance counters

Message ID 20191017061106.22640-3-umesh.nerlige.ramappa@intel.com (mailing list archive)
State New, archived
Headers show
Series [v2,1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers | expand

Commit Message

Umesh Nerlige Ramappa Oct. 17, 2019, 6:11 a.m. UTC
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

We want this so we can preempt performance queries and keep the system
responsive even when long running queries are ongoing. We avoid doing
it for all contexts.

v2: use LRI to modify context control (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 22 ++++++---
 drivers/gpu/drm/i915/gt/intel_lrc.h |  3 ++
 drivers/gpu/drm/i915/i915_perf.c    | 76 +++++++++++++++++++++++++++--
 3 files changed, 90 insertions(+), 11 deletions(-)

Comments

Chris Wilson Oct. 17, 2019, 6:30 a.m. UTC | #1
Quoting Umesh Nerlige Ramappa (2019-10-17 07:11:06)
> +static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
> +{
> +       struct i915_request *rq;
> +       u32 *cs;
> +       int err = 0;
> +
> +       rq = i915_request_create(ce);
> +       if (IS_ERR(rq))
> +               return PTR_ERR(rq);
> +
> +       cs = intel_ring_begin(rq, 4);
> +       if (IS_ERR(cs)) {
> +               err = PTR_ERR(cs);
> +               goto out;
> +       }
> +
> +       *cs++ = MI_LOAD_REGISTER_IMM(1);
> +       *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
> +       *cs++ = intel_lrc_make_ctx_control(ce->engine) |

It's a masked update. It only changes the bit in the register identified
by the mask.

*cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
		      enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);


> +               (enable ?
> +                _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE) :
> +                _MASKED_BIT_DISABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE));
> +       *cs++ = MI_NOOP;
> +
> +       intel_ring_advance(rq, cs);
> +
> +out:
> +       i915_request_add(rq);
> +
> +       return err;
> +}

>  /*
>   * Manages updating the per-context aspects of the OA stream
>   * configuration across all contexts.
> @@ -2316,6 +2375,17 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>                         return err;
>                 }
>  
> +               /*
> +                * For Gen12, performance counters are context
> +                * saved/restored. Only enable it for the context that
> +                * requested this.
> +                */
> +               if (ctx == stream->ctx && IS_GEN(i915, 12)) {
> +                       err = gen12_configure_context_oar(ctx, oa_config != NULL);

You have the intel_context pinned already as stream->pinned_ctx.
-Chris
---------------------------------------------------------------------
Intel Corporation (UK) Limited
Registered No. 1134945 (England)
Registered Office: Pipers Way, Swindon SN3 1RJ
VAT No: 860 2173 47

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
Umesh Nerlige Ramappa Oct. 17, 2019, 7:24 a.m. UTC | #2
On Thu, Oct 17, 2019 at 07:30:18AM +0100, Chris Wilson wrote:
>Quoting Umesh Nerlige Ramappa (2019-10-17 07:11:06)
>> +static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
>> +{
>> +       struct i915_request *rq;
>> +       u32 *cs;
>> +       int err = 0;
>> +
>> +       rq = i915_request_create(ce);
>> +       if (IS_ERR(rq))
>> +               return PTR_ERR(rq);
>> +
>> +       cs = intel_ring_begin(rq, 4);
>> +       if (IS_ERR(cs)) {
>> +               err = PTR_ERR(cs);
>> +               goto out;
>> +       }
>> +
>> +       *cs++ = MI_LOAD_REGISTER_IMM(1);
>> +       *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
>> +       *cs++ = intel_lrc_make_ctx_control(ce->engine) |
>
>It's a masked update. It only changes the bit in the register identified
>by the mask.
>
>*cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
>		      enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
>

got it. in that case changes related to intel_lrc_make_ctx_control are 
not needed.
>
>> +               (enable ?
>> +                _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE) :
>> +                _MASKED_BIT_DISABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE));
>> +       *cs++ = MI_NOOP;
>> +
>> +       intel_ring_advance(rq, cs);
>> +
>> +out:
>> +       i915_request_add(rq);
>> +
>> +       return err;
>> +}
>
>>  /*
>>   * Manages updating the per-context aspects of the OA stream
>>   * configuration across all contexts.
>> @@ -2316,6 +2375,17 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>>                         return err;
>>                 }
>>
>> +               /*
>> +                * For Gen12, performance counters are context
>> +                * saved/restored. Only enable it for the context that
>> +                * requested this.
>> +                */
>> +               if (ctx == stream->ctx && IS_GEN(i915, 12)) {
>> +                       err = gen12_configure_context_oar(ctx, oa_config != NULL);
>
>You have the intel_context pinned already as stream->pinned_ctx.

I see, that's simpler. I will call emit function directly on pinned 
context.

Thanks,
Umesh

>-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index e9fe9f79cedd..d45c020fc13d 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1222,6 +1222,19 @@  static bool can_merge_rq(const struct i915_request *prev,
 	return true;
 }
 
+u32 intel_lrc_make_ctx_control(const struct intel_engine_cs *engine)
+{
+	u32 value =
+		_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
+		_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+
+	if (INTEL_GEN(engine->i915) < 11)
+		value |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
+					    CTX_CTRL_RS_CTX_ENABLE);
+
+	return value;
+}
+
 static void virtual_update_register_offsets(u32 *regs,
 					    struct intel_engine_cs *engine)
 {
@@ -3667,14 +3680,7 @@  static void init_common_reg_state(u32 * const regs,
 				  const struct intel_engine_cs *engine,
 				  const struct intel_ring *ring)
 {
-	regs[CTX_CONTEXT_CONTROL] =
-		_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
-		_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
-	if (INTEL_GEN(engine->i915) < 11)
-		regs[CTX_CONTEXT_CONTROL] |=
-			_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
-					    CTX_CTRL_RS_CTX_ENABLE);
-
+	regs[CTX_CONTEXT_CONTROL] = intel_lrc_make_ctx_control(engine);
 	regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID;
 	regs[CTX_BB_STATE] = RING_BB_PPGTT;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 99dc576a4e25..6b2b196f09e7 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -43,6 +43,7 @@  struct intel_engine_cs;
 #define	  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT	(1 << 0)
 #define   CTX_CTRL_RS_CTX_ENABLE		(1 << 1)
 #define	  CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT	(1 << 2)
+#define	  GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE	(1 << 8)
 #define RING_CONTEXT_STATUS_PTR(base)		_MMIO((base) + 0x3a0)
 #define RING_EXECLIST_SQ_CONTENTS(base)		_MMIO((base) + 0x510)
 #define RING_EXECLIST_CONTROL(base)		_MMIO((base) + 0x550)
@@ -145,4 +146,6 @@  struct intel_engine_cs *
 intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
 				 unsigned int sibling);
 
+u32 intel_lrc_make_ctx_control(const struct intel_engine_cs *engine);
+
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index abc2b7a6dc92..04ebe3207de8 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2081,9 +2081,6 @@  gen8_update_reg_state_unlocked(const struct intel_context *ce,
 	for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++)
 		reg_state[ctx_flexeu0 + i * 2 + 1] =
 			oa_config_flex_reg(stream->oa_config, flex_regs[i]);
-
-	reg_state[CTX_R_PWR_CLK_STATE] =
-		intel_sseu_make_rpcs(ce->engine->i915, &ce->sseu);
 }
 
 struct flex {
@@ -2211,6 +2208,68 @@  static int gen8_configure_context(struct i915_gem_context *ctx,
 	return err;
 }
 
+static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
+{
+	struct i915_request *rq;
+	u32 *cs;
+	int err = 0;
+
+	rq = i915_request_create(ce);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto out;
+	}
+
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
+	*cs++ = intel_lrc_make_ctx_control(ce->engine) |
+		(enable ?
+		 _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE) :
+		 _MASKED_BIT_DISABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE));
+	*cs++ = MI_NOOP;
+
+	intel_ring_advance(rq, cs);
+
+out:
+	i915_request_add(rq);
+
+	return err;
+}
+
+static int gen12_configure_context_oar(struct i915_gem_context *ctx,
+				       bool enable)
+{
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+	int err = 0;
+
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		GEM_BUG_ON(ce == ce->engine->kernel_context);
+
+		if (ce->engine->class != RENDER_CLASS)
+			continue;
+
+		err = intel_context_lock_pinned(ce);
+		if (err)
+			break;
+
+		/* Otherwise OA settings will be set upon first use */
+		if (intel_context_is_pinned(ce))
+			err = gen12_emit_oar_config(ce, enable);
+
+		intel_context_unlock_pinned(ce);
+		if (err)
+			break;
+	}
+	i915_gem_context_unlock_engines(ctx);
+
+	return err;
+}
+
 /*
  * Manages updating the per-context aspects of the OA stream
  * configuration across all contexts.
@@ -2316,6 +2375,17 @@  static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
 			return err;
 		}
 
+		/*
+		 * For Gen12, performance counters are context
+		 * saved/restored. Only enable it for the context that
+		 * requested this.
+		 */
+		if (ctx == stream->ctx && IS_GEN(i915, 12)) {
+			err = gen12_configure_context_oar(ctx, oa_config != NULL);
+			if (err)
+				return err;
+		}
+
 		spin_lock(&i915->gem.contexts.lock);
 		list_safe_reset_next(ctx, cn, link);
 		i915_gem_context_put(ctx);