Message ID | 20191114192114.61547-2-umesh.nerlige.ramappa@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/2] drm/i915/perf: Allow non-privileged access when OA buffer is not sampled | expand |
On Thu, Nov 14, 2019 at 11:21:14AM -0800, Umesh Nerlige Ramappa wrote: >Gen12 supports saving/restoring render counters per context. Apply OAR >configuration only for the context that is passed in to perf. > >v2: >- Fix OACTXCONTROL value to only stop/resume counters. >- Remove gen12_update_reg_state_unlocked as power state is already > applied by the caller. > >Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> IGT Tests in this series: https://patchwork.freedesktop.org/series/69322/ gen12-mi-rpc gen12-unprivileged-single-ctx-counters Thanks, Umesh >--- > drivers/gpu/drm/i915/i915_perf.c | 193 +++++++++++++++++-------------- > 1 file changed, 108 insertions(+), 85 deletions(-) > >diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >index 221c1090ae93..2f0be5fbef4b 100644 >--- a/drivers/gpu/drm/i915/i915_perf.c >+++ b/drivers/gpu/drm/i915/i915_perf.c >@@ -2078,20 +2078,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce, > u32 *reg_state = ce->lrc_reg_state; > int i; > >- if (IS_GEN(stream->perf->i915, 12)) { >- u32 format = stream->oa_buffer.format; >+ reg_state[ctx_oactxctrl + 1] = >+ (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | >+ (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | >+ GEN8_OA_COUNTER_RESUME; > >- reg_state[ctx_oactxctrl + 1] = >- (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | >- (stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); >- } else { >- reg_state[ctx_oactxctrl + 1] = >- (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | >- (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | >- GEN8_OA_COUNTER_RESUME; >- } >- >- for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++) >+ for (i = 0; i < ARRAY_SIZE(flex_regs); i++) > reg_state[ctx_flexeu0 + i * 2 + 1] = > oa_config_flex_reg(stream->oa_config, flex_regs[i]); > >@@ -2224,34 +2216,49 @@ static int gen8_configure_context(struct i915_gem_context *ctx, > return err; > } > >-static int gen12_emit_oar_config(struct intel_context *ce, bool enable) >+static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable) > { >- struct i915_request *rq; >- u32 *cs; >- int err = 0; >- >- rq = i915_request_create(ce); >- if (IS_ERR(rq)) >- return PTR_ERR(rq); >+ int err; >+ struct intel_context *ce = stream->pinned_ctx; >+ struct flex regs_context[] = { >+ { >+ GEN8_OACTXCONTROL, >+ stream->perf->ctx_oactxctrl_offset + 1, >+ enable ? GEN8_OA_COUNTER_RESUME : 0, >+ }, >+ }; >+ struct flex regs_lri[] = { >+ { >+ GEN12_OAR_OACONTROL, >+ }, >+ { >+ RING_CONTEXT_CONTROL(ce->engine->mmio_base), >+ }, >+ }; >+ u32 format = stream->oa_buffer.format; > >- cs = intel_ring_begin(rq, 4); >- if (IS_ERR(cs)) { >- err = PTR_ERR(cs); >- goto out; >- } >+ regs_lri[0].value = >+ (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | >+ (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); > >- *cs++ = MI_LOAD_REGISTER_IMM(1); >- *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base)); >- *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, >- enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0); >- *cs++ = MI_NOOP; >+ regs_lri[1].value = >+ _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, >+ enable ? >+ GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : >+ 0); > >- intel_ring_advance(rq, cs); >+ /* Modify the context image of pinned context with regs_context*/ >+ err = intel_context_lock_pinned(ce); >+ if (err) >+ return err; > >-out: >- i915_request_add(rq); >+ err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context)); >+ intel_context_unlock_pinned(ce); >+ if (err) >+ return err; > >- return err; >+ /* Apply regs_lri using LRI with pinned context */ >+ return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri)); > } > > /* >@@ -2277,53 +2284,16 @@ static int gen12_emit_oar_config(struct intel_context *ce, bool enable) > * per-context OA state. > * > * Note: it's only the RCS/Render context that has any OA state. >+ * Note: the first flex register passed must always be R_PWR_CLK_STATE > */ >-static int lrc_configure_all_contexts(struct i915_perf_stream *stream, >- const struct i915_oa_config *oa_config) >+static int oa_configure_all_contexts(struct i915_perf_stream *stream, >+ struct flex *regs, >+ size_t num_regs) > { > struct drm_i915_private *i915 = stream->perf->i915; >- /* The MMIO offsets for Flex EU registers aren't contiguous */ >- const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; >-#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) >- struct flex regs[] = { >- { >- GEN8_R_PWR_CLK_STATE, >- CTX_R_PWR_CLK_STATE, >- }, >- { >- IS_GEN(i915, 12) ? >- GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL, >- stream->perf->ctx_oactxctrl_offset + 1, >- }, >- { EU_PERF_CNTL0, ctx_flexeuN(0) }, >- { EU_PERF_CNTL1, ctx_flexeuN(1) }, >- { EU_PERF_CNTL2, ctx_flexeuN(2) }, >- { EU_PERF_CNTL3, ctx_flexeuN(3) }, >- { EU_PERF_CNTL4, ctx_flexeuN(4) }, >- { EU_PERF_CNTL5, ctx_flexeuN(5) }, >- { EU_PERF_CNTL6, ctx_flexeuN(6) }, >- }; >-#undef ctx_flexeuN > struct intel_engine_cs *engine; > struct i915_gem_context *ctx, *cn; >- size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs); >- int i, err; >- >- if (IS_GEN(i915, 12)) { >- u32 format = stream->oa_buffer.format; >- >- regs[1].value = >- (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | >- (oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); >- } else { >- regs[1].value = >- (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | >- (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | >- GEN8_OA_COUNTER_RESUME; >- } >- >- for (i = 2; !!ctx_flexeu0 && i < array_size; i++) >- regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); >+ int err; > > lockdep_assert_held(&stream->perf->lock); > >@@ -2353,7 +2323,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, > > spin_unlock(&i915->gem.contexts.lock); > >- err = gen8_configure_context(ctx, regs, array_size); >+ err = gen8_configure_context(ctx, regs, num_regs); > if (err) { > i915_gem_context_put(ctx); > return err; >@@ -2378,7 +2348,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, > > regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); > >- err = gen8_modify_self(ce, regs, array_size); >+ err = gen8_modify_self(ce, regs, num_regs); > if (err) > return err; > } >@@ -2386,6 +2356,56 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, > return 0; > } > >+static int gen12_configure_all_contexts(struct i915_perf_stream *stream, >+ const struct i915_oa_config *oa_config) >+{ >+ struct flex regs[] = { >+ { >+ GEN8_R_PWR_CLK_STATE, >+ CTX_R_PWR_CLK_STATE, >+ }, >+ }; >+ >+ return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); >+} >+ >+static int lrc_configure_all_contexts(struct i915_perf_stream *stream, >+ const struct i915_oa_config *oa_config) >+{ >+ /* The MMIO offsets for Flex EU registers aren't contiguous */ >+ const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; >+#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) >+ struct flex regs[] = { >+ { >+ GEN8_R_PWR_CLK_STATE, >+ CTX_R_PWR_CLK_STATE, >+ }, >+ { >+ GEN8_OACTXCONTROL, >+ stream->perf->ctx_oactxctrl_offset + 1, >+ }, >+ { EU_PERF_CNTL0, ctx_flexeuN(0) }, >+ { EU_PERF_CNTL1, ctx_flexeuN(1) }, >+ { EU_PERF_CNTL2, ctx_flexeuN(2) }, >+ { EU_PERF_CNTL3, ctx_flexeuN(3) }, >+ { EU_PERF_CNTL4, ctx_flexeuN(4) }, >+ { EU_PERF_CNTL5, ctx_flexeuN(5) }, >+ { EU_PERF_CNTL6, ctx_flexeuN(6) }, >+ }; >+#undef ctx_flexeuN >+ int i; >+ >+ regs[1].value = >+ (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | >+ (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | >+ GEN8_OA_COUNTER_RESUME; >+ >+ for (i = 2; i < ARRAY_SIZE(regs); i++) >+ regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); >+ >+ return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); >+} >+ > static int gen8_enable_metric_set(struct i915_perf_stream *stream) > { > struct intel_uncore *uncore = stream->uncore; >@@ -2469,7 +2489,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream) > * to make sure all slices/subslices are ON before writing to NOA > * registers. > */ >- ret = lrc_configure_all_contexts(stream, oa_config); >+ ret = gen12_configure_all_contexts(stream, oa_config); > if (ret) > return ret; > >@@ -2479,8 +2499,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream) > * requested this. > */ > if (stream->ctx) { >- ret = gen12_emit_oar_config(stream->pinned_ctx, >- oa_config != NULL); >+ ret = gen12_configure_oar_context(stream, oa_config != NULL); > if (ret) > return ret; > } >@@ -2514,11 +2533,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) > struct intel_uncore *uncore = stream->uncore; > > /* Reset all contexts' slices/subslices configurations. */ >- lrc_configure_all_contexts(stream, NULL); >+ gen12_configure_all_contexts(stream, NULL); > > /* disable the context save/restore or OAR counters */ > if (stream->ctx) >- gen12_emit_oar_config(stream->pinned_ctx, false); >+ gen12_configure_oar_context(stream, false); > > /* Make sure we disable noa to save power. */ > intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); >@@ -2860,7 +2879,11 @@ void i915_oa_init_reg_state(const struct intel_context *ce, > return; > > stream = engine->i915->perf.exclusive_stream; >- if (stream) >+ /* >+ * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller >+ * is already doing that, so nothing to be done for gen12 here. >+ */ >+ if (stream && INTEL_GEN(stream->perf->i915) < 12) > gen8_update_reg_state_unlocked(ce, stream); > } > >-- >2.20.1 > >_______________________________________________ >Intel-gfx mailing list >Intel-gfx@lists.freedesktop.org >https://lists.freedesktop.org/mailman/listinfo/intel-gfx
On 14/11/2019 21:21, Umesh Nerlige Ramappa wrote: > Gen12 supports saving/restoring render counters per context. Apply OAR > configuration only for the context that is passed in to perf. > > v2: > - Fix OACTXCONTROL value to only stop/resume counters. > - Remove gen12_update_reg_state_unlocked as power state is already > applied by the caller. > > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> > --- > drivers/gpu/drm/i915/i915_perf.c | 193 +++++++++++++++++-------------- > 1 file changed, 108 insertions(+), 85 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 221c1090ae93..2f0be5fbef4b 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -2078,20 +2078,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce, > u32 *reg_state = ce->lrc_reg_state; > int i; > > - if (IS_GEN(stream->perf->i915, 12)) { > - u32 format = stream->oa_buffer.format; > + reg_state[ctx_oactxctrl + 1] = > + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | > + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | > + GEN8_OA_COUNTER_RESUME; > > - reg_state[ctx_oactxctrl + 1] = > - (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | > - (stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); > - } else { > - reg_state[ctx_oactxctrl + 1] = > - (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | > - (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | > - GEN8_OA_COUNTER_RESUME; > - } > - > - for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++) > + for (i = 0; i < ARRAY_SIZE(flex_regs); i++) > reg_state[ctx_flexeu0 + i * 2 + 1] = > oa_config_flex_reg(stream->oa_config, flex_regs[i]); > > @@ -2224,34 +2216,49 @@ static int gen8_configure_context(struct i915_gem_context *ctx, > return err; > } > > -static int gen12_emit_oar_config(struct intel_context *ce, bool enable) > +static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable) > { > - struct i915_request *rq; > - u32 *cs; > - int err = 0; > - > - rq = i915_request_create(ce); > - if (IS_ERR(rq)) > - return PTR_ERR(rq); > + int err; > + struct intel_context *ce = stream->pinned_ctx; > + struct flex regs_context[] = { > + { > + GEN8_OACTXCONTROL, > + stream->perf->ctx_oactxctrl_offset + 1, > + enable ? GEN8_OA_COUNTER_RESUME : 0, > + }, > + }; When do we configure the Flex registers? > + struct flex regs_lri[] = { > + { > + GEN12_OAR_OACONTROL, > + }, > + { > + RING_CONTEXT_CONTROL(ce->engine->mmio_base), > + }, > + }; > + u32 format = stream->oa_buffer.format; > > - cs = intel_ring_begin(rq, 4); > - if (IS_ERR(cs)) { > - err = PTR_ERR(cs); > - goto out; > - } > + regs_lri[0].value = > + (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | > + (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); > > - *cs++ = MI_LOAD_REGISTER_IMM(1); > - *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base)); > - *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, > - enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0); > - *cs++ = MI_NOOP; > + regs_lri[1].value = > + _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, > + enable ? > + GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : > + 0); Can't we put those values in the array above? > > - intel_ring_advance(rq, cs); > + /* Modify the context image of pinned context with regs_context*/ > + err = intel_context_lock_pinned(ce); > + if (err) > + return err; > > -out: > - i915_request_add(rq); > + err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context)); > + intel_context_unlock_pinned(ce); > + if (err) > + return err; > > - return err; > + /* Apply regs_lri using LRI with pinned context */ > + return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri)); > } > > /* > @@ -2277,53 +2284,16 @@ static int gen12_emit_oar_config(struct intel_context *ce, bool enable) > * per-context OA state. > * > * Note: it's only the RCS/Render context that has any OA state. > + * Note: the first flex register passed must always be R_PWR_CLK_STATE > */ > -static int lrc_configure_all_contexts(struct i915_perf_stream *stream, > - const struct i915_oa_config *oa_config) > +static int oa_configure_all_contexts(struct i915_perf_stream *stream, > + struct flex *regs, > + size_t num_regs) > { > struct drm_i915_private *i915 = stream->perf->i915; > - /* The MMIO offsets for Flex EU registers aren't contiguous */ > - const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; > -#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) > - struct flex regs[] = { > - { > - GEN8_R_PWR_CLK_STATE, > - CTX_R_PWR_CLK_STATE, > - }, > - { > - IS_GEN(i915, 12) ? > - GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL, > - stream->perf->ctx_oactxctrl_offset + 1, > - }, > - { EU_PERF_CNTL0, ctx_flexeuN(0) }, > - { EU_PERF_CNTL1, ctx_flexeuN(1) }, > - { EU_PERF_CNTL2, ctx_flexeuN(2) }, > - { EU_PERF_CNTL3, ctx_flexeuN(3) }, > - { EU_PERF_CNTL4, ctx_flexeuN(4) }, > - { EU_PERF_CNTL5, ctx_flexeuN(5) }, > - { EU_PERF_CNTL6, ctx_flexeuN(6) }, > - }; > -#undef ctx_flexeuN > struct intel_engine_cs *engine; > struct i915_gem_context *ctx, *cn; > - size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs); > - int i, err; > - > - if (IS_GEN(i915, 12)) { > - u32 format = stream->oa_buffer.format; > - > - regs[1].value = > - (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | > - (oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); > - } else { > - regs[1].value = > - (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | > - (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | > - GEN8_OA_COUNTER_RESUME; > - } > - > - for (i = 2; !!ctx_flexeu0 && i < array_size; i++) > - regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); > + int err; > > lockdep_assert_held(&stream->perf->lock); > > @@ -2353,7 +2323,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, > > spin_unlock(&i915->gem.contexts.lock); > > - err = gen8_configure_context(ctx, regs, array_size); > + err = gen8_configure_context(ctx, regs, num_regs); > if (err) { > i915_gem_context_put(ctx); > return err; > @@ -2378,7 +2348,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, > > regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); > > - err = gen8_modify_self(ce, regs, array_size); > + err = gen8_modify_self(ce, regs, num_regs); > if (err) > return err; > } > @@ -2386,6 +2356,56 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, > return 0; > } > > +static int gen12_configure_all_contexts(struct i915_perf_stream *stream, > + const struct i915_oa_config *oa_config) > +{ > + struct flex regs[] = { > + { > + GEN8_R_PWR_CLK_STATE, > + CTX_R_PWR_CLK_STATE, > + }, > + }; > + > + return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); > +} > + > +static int lrc_configure_all_contexts(struct i915_perf_stream *stream, > + const struct i915_oa_config *oa_config) > +{ > + /* The MMIO offsets for Flex EU registers aren't contiguous */ > + const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; > +#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) > + struct flex regs[] = { > + { > + GEN8_R_PWR_CLK_STATE, > + CTX_R_PWR_CLK_STATE, > + }, > + { > + GEN8_OACTXCONTROL, > + stream->perf->ctx_oactxctrl_offset + 1, > + }, > + { EU_PERF_CNTL0, ctx_flexeuN(0) }, > + { EU_PERF_CNTL1, ctx_flexeuN(1) }, > + { EU_PERF_CNTL2, ctx_flexeuN(2) }, > + { EU_PERF_CNTL3, ctx_flexeuN(3) }, > + { EU_PERF_CNTL4, ctx_flexeuN(4) }, > + { EU_PERF_CNTL5, ctx_flexeuN(5) }, > + { EU_PERF_CNTL6, ctx_flexeuN(6) }, > + }; > +#undef ctx_flexeuN > + int i; > + > + regs[1].value = > + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | > + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | > + GEN8_OA_COUNTER_RESUME; > + > + for (i = 2; i < ARRAY_SIZE(regs); i++) > + regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); > + > + return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); > +} > + > static int gen8_enable_metric_set(struct i915_perf_stream *stream) > { > struct intel_uncore *uncore = stream->uncore; > @@ -2469,7 +2489,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream) > * to make sure all slices/subslices are ON before writing to NOA > * registers. > */ > - ret = lrc_configure_all_contexts(stream, oa_config); > + ret = gen12_configure_all_contexts(stream, oa_config); > if (ret) > return ret; > > @@ -2479,8 +2499,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream) > * requested this. > */ > if (stream->ctx) { > - ret = gen12_emit_oar_config(stream->pinned_ctx, > - oa_config != NULL); > + ret = gen12_configure_oar_context(stream, oa_config != NULL); I think you can assume oa_config is going to be != NULL in the enable_metric_set vfunc. > if (ret) > return ret; > } > @@ -2514,11 +2533,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) > struct intel_uncore *uncore = stream->uncore; > > /* Reset all contexts' slices/subslices configurations. */ > - lrc_configure_all_contexts(stream, NULL); > + gen12_configure_all_contexts(stream, NULL); > > /* disable the context save/restore or OAR counters */ > if (stream->ctx) > - gen12_emit_oar_config(stream->pinned_ctx, false); > + gen12_configure_oar_context(stream, false); > > /* Make sure we disable noa to save power. */ > intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); > @@ -2860,7 +2879,11 @@ void i915_oa_init_reg_state(const struct intel_context *ce, > return; > > stream = engine->i915->perf.exclusive_stream; > - if (stream) > + /* > + * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller > + * is already doing that, so nothing to be done for gen12 here. > + */ > + if (stream && INTEL_GEN(stream->perf->i915) < 12) > gen8_update_reg_state_unlocked(ce, stream); > } >
On Mon, Nov 18, 2019 at 03:42:28PM +0200, Lionel Landwerlin wrote: >On 14/11/2019 21:21, Umesh Nerlige Ramappa wrote: >>Gen12 supports saving/restoring render counters per context. Apply OAR >>configuration only for the context that is passed in to perf. >> >>v2: >>- Fix OACTXCONTROL value to only stop/resume counters. >>- Remove gen12_update_reg_state_unlocked as power state is already >> applied by the caller. >> >>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> >>--- >> drivers/gpu/drm/i915/i915_perf.c | 193 +++++++++++++++++-------------- >> 1 file changed, 108 insertions(+), 85 deletions(-) >> >>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >>index 221c1090ae93..2f0be5fbef4b 100644 >>--- a/drivers/gpu/drm/i915/i915_perf.c >>+++ b/drivers/gpu/drm/i915/i915_perf.c >>@@ -2078,20 +2078,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce, >> u32 *reg_state = ce->lrc_reg_state; >> int i; >>- if (IS_GEN(stream->perf->i915, 12)) { >>- u32 format = stream->oa_buffer.format; >>+ reg_state[ctx_oactxctrl + 1] = >>+ (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | >>+ (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | >>+ GEN8_OA_COUNTER_RESUME; >>- reg_state[ctx_oactxctrl + 1] = >>- (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | >>- (stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); >>- } else { >>- reg_state[ctx_oactxctrl + 1] = >>- (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | >>- (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | >>- GEN8_OA_COUNTER_RESUME; >>- } >>- >>- for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++) >>+ for (i = 0; i < ARRAY_SIZE(flex_regs); i++) >> reg_state[ctx_flexeu0 + i * 2 + 1] = >> oa_config_flex_reg(stream->oa_config, flex_regs[i]); >>@@ -2224,34 +2216,49 @@ static int gen8_configure_context(struct i915_gem_context *ctx, >> return err; >> } >>-static int gen12_emit_oar_config(struct intel_context *ce, bool enable) >>+static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable) >> { >>- struct i915_request *rq; >>- u32 *cs; >>- int err = 0; >>- >>- rq = i915_request_create(ce); >>- if (IS_ERR(rq)) >>- return PTR_ERR(rq); >>+ int err; >>+ struct intel_context *ce = stream->pinned_ctx; >>+ struct flex regs_context[] = { >>+ { >>+ GEN8_OACTXCONTROL, >>+ stream->perf->ctx_oactxctrl_offset + 1, >>+ enable ? GEN8_OA_COUNTER_RESUME : 0, >>+ }, >>+ }; > > >When do we configure the Flex registers? I don't see flex registers in the context image dump or in the spec, so I guess we would only configure them if they are part of the metric set. Will post a new version with below comments. Thanks, Umesh > > >>+ struct flex regs_lri[] = { >>+ { >>+ GEN12_OAR_OACONTROL, >>+ }, >>+ { >>+ RING_CONTEXT_CONTROL(ce->engine->mmio_base), >>+ }, >>+ }; >>+ u32 format = stream->oa_buffer.format; >>- cs = intel_ring_begin(rq, 4); >>- if (IS_ERR(cs)) { >>- err = PTR_ERR(cs); >>- goto out; >>- } >>+ regs_lri[0].value = >>+ (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | >>+ (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); >>- *cs++ = MI_LOAD_REGISTER_IMM(1); >>- *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base)); >>- *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, >>- enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0); >>- *cs++ = MI_NOOP; >>+ regs_lri[1].value = >>+ _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, >>+ enable ? >>+ GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : >>+ 0); > > >Can't we put those values in the array above? > > >>- intel_ring_advance(rq, cs); >>+ /* Modify the context image of pinned context with regs_context*/ >>+ err = intel_context_lock_pinned(ce); >>+ if (err) >>+ return err; >>-out: >>- i915_request_add(rq); >>+ err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context)); >>+ intel_context_unlock_pinned(ce); >>+ if (err) >>+ return err; >>- return err; >>+ /* Apply regs_lri using LRI with pinned context */ >>+ return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri)); >> } >> /* >>@@ -2277,53 +2284,16 @@ static int gen12_emit_oar_config(struct intel_context *ce, bool enable) >> * per-context OA state. >> * >> * Note: it's only the RCS/Render context that has any OA state. >>+ * Note: the first flex register passed must always be R_PWR_CLK_STATE >> */ >>-static int lrc_configure_all_contexts(struct i915_perf_stream *stream, >>- const struct i915_oa_config *oa_config) >>+static int oa_configure_all_contexts(struct i915_perf_stream *stream, >>+ struct flex *regs, >>+ size_t num_regs) >> { >> struct drm_i915_private *i915 = stream->perf->i915; >>- /* The MMIO offsets for Flex EU registers aren't contiguous */ >>- const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; >>-#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) >>- struct flex regs[] = { >>- { >>- GEN8_R_PWR_CLK_STATE, >>- CTX_R_PWR_CLK_STATE, >>- }, >>- { >>- IS_GEN(i915, 12) ? >>- GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL, >>- stream->perf->ctx_oactxctrl_offset + 1, >>- }, >>- { EU_PERF_CNTL0, ctx_flexeuN(0) }, >>- { EU_PERF_CNTL1, ctx_flexeuN(1) }, >>- { EU_PERF_CNTL2, ctx_flexeuN(2) }, >>- { EU_PERF_CNTL3, ctx_flexeuN(3) }, >>- { EU_PERF_CNTL4, ctx_flexeuN(4) }, >>- { EU_PERF_CNTL5, ctx_flexeuN(5) }, >>- { EU_PERF_CNTL6, ctx_flexeuN(6) }, >>- }; >>-#undef ctx_flexeuN >> struct intel_engine_cs *engine; >> struct i915_gem_context *ctx, *cn; >>- size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs); >>- int i, err; >>- >>- if (IS_GEN(i915, 12)) { >>- u32 format = stream->oa_buffer.format; >>- >>- regs[1].value = >>- (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | >>- (oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); >>- } else { >>- regs[1].value = >>- (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | >>- (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | >>- GEN8_OA_COUNTER_RESUME; >>- } >>- >>- for (i = 2; !!ctx_flexeu0 && i < array_size; i++) >>- regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); >>+ int err; >> lockdep_assert_held(&stream->perf->lock); >>@@ -2353,7 +2323,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, >> spin_unlock(&i915->gem.contexts.lock); >>- err = gen8_configure_context(ctx, regs, array_size); >>+ err = gen8_configure_context(ctx, regs, num_regs); >> if (err) { >> i915_gem_context_put(ctx); >> return err; >>@@ -2378,7 +2348,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, >> regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); >>- err = gen8_modify_self(ce, regs, array_size); >>+ err = gen8_modify_self(ce, regs, num_regs); >> if (err) >> return err; >> } >>@@ -2386,6 +2356,56 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, >> return 0; >> } >>+static int gen12_configure_all_contexts(struct i915_perf_stream *stream, >>+ const struct i915_oa_config *oa_config) >>+{ >>+ struct flex regs[] = { >>+ { >>+ GEN8_R_PWR_CLK_STATE, >>+ CTX_R_PWR_CLK_STATE, >>+ }, >>+ }; >>+ >>+ return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); >>+} >>+ >>+static int lrc_configure_all_contexts(struct i915_perf_stream *stream, >>+ const struct i915_oa_config *oa_config) >>+{ >>+ /* The MMIO offsets for Flex EU registers aren't contiguous */ >>+ const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; >>+#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) >>+ struct flex regs[] = { >>+ { >>+ GEN8_R_PWR_CLK_STATE, >>+ CTX_R_PWR_CLK_STATE, >>+ }, >>+ { >>+ GEN8_OACTXCONTROL, >>+ stream->perf->ctx_oactxctrl_offset + 1, >>+ }, >>+ { EU_PERF_CNTL0, ctx_flexeuN(0) }, >>+ { EU_PERF_CNTL1, ctx_flexeuN(1) }, >>+ { EU_PERF_CNTL2, ctx_flexeuN(2) }, >>+ { EU_PERF_CNTL3, ctx_flexeuN(3) }, >>+ { EU_PERF_CNTL4, ctx_flexeuN(4) }, >>+ { EU_PERF_CNTL5, ctx_flexeuN(5) }, >>+ { EU_PERF_CNTL6, ctx_flexeuN(6) }, >>+ }; >>+#undef ctx_flexeuN >>+ int i; >>+ >>+ regs[1].value = >>+ (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | >>+ (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | >>+ GEN8_OA_COUNTER_RESUME; >>+ >>+ for (i = 2; i < ARRAY_SIZE(regs); i++) >>+ regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); >>+ >>+ return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); >>+} >>+ >> static int gen8_enable_metric_set(struct i915_perf_stream *stream) >> { >> struct intel_uncore *uncore = stream->uncore; >>@@ -2469,7 +2489,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream) >> * to make sure all slices/subslices are ON before writing to NOA >> * registers. >> */ >>- ret = lrc_configure_all_contexts(stream, oa_config); >>+ ret = gen12_configure_all_contexts(stream, oa_config); >> if (ret) >> return ret; >>@@ -2479,8 +2499,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream) >> * requested this. >> */ >> if (stream->ctx) { >>- ret = gen12_emit_oar_config(stream->pinned_ctx, >>- oa_config != NULL); >>+ ret = gen12_configure_oar_context(stream, oa_config != NULL); > > >I think you can assume oa_config is going to be != NULL in the >enable_metric_set vfunc. > > >> if (ret) >> return ret; >> } >>@@ -2514,11 +2533,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) >> struct intel_uncore *uncore = stream->uncore; >> /* Reset all contexts' slices/subslices configurations. */ >>- lrc_configure_all_contexts(stream, NULL); >>+ gen12_configure_all_contexts(stream, NULL); >> /* disable the context save/restore or OAR counters */ >> if (stream->ctx) >>- gen12_emit_oar_config(stream->pinned_ctx, false); >>+ gen12_configure_oar_context(stream, false); >> /* Make sure we disable noa to save power. */ >> intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); >>@@ -2860,7 +2879,11 @@ void i915_oa_init_reg_state(const struct intel_context *ce, >> return; >> stream = engine->i915->perf.exclusive_stream; >>- if (stream) >>+ /* >>+ * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller >>+ * is already doing that, so nothing to be done for gen12 here. >>+ */ >>+ if (stream && INTEL_GEN(stream->perf->i915) < 12) >> gen8_update_reg_state_unlocked(ce, stream); >> } > >
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 221c1090ae93..2f0be5fbef4b 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2078,20 +2078,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce, u32 *reg_state = ce->lrc_reg_state; int i; - if (IS_GEN(stream->perf->i915, 12)) { - u32 format = stream->oa_buffer.format; + reg_state[ctx_oactxctrl + 1] = + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME; - reg_state[ctx_oactxctrl + 1] = - (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | - (stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); - } else { - reg_state[ctx_oactxctrl + 1] = - (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | - (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME; - } - - for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++) + for (i = 0; i < ARRAY_SIZE(flex_regs); i++) reg_state[ctx_flexeu0 + i * 2 + 1] = oa_config_flex_reg(stream->oa_config, flex_regs[i]); @@ -2224,34 +2216,49 @@ static int gen8_configure_context(struct i915_gem_context *ctx, return err; } -static int gen12_emit_oar_config(struct intel_context *ce, bool enable) +static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable) { - struct i915_request *rq; - u32 *cs; - int err = 0; - - rq = i915_request_create(ce); - if (IS_ERR(rq)) - return PTR_ERR(rq); + int err; + struct intel_context *ce = stream->pinned_ctx; + struct flex regs_context[] = { + { + GEN8_OACTXCONTROL, + stream->perf->ctx_oactxctrl_offset + 1, + enable ? GEN8_OA_COUNTER_RESUME : 0, + }, + }; + struct flex regs_lri[] = { + { + GEN12_OAR_OACONTROL, + }, + { + RING_CONTEXT_CONTROL(ce->engine->mmio_base), + }, + }; + u32 format = stream->oa_buffer.format; - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto out; - } + regs_lri[0].value = + (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | + (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base)); - *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, - enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0); - *cs++ = MI_NOOP; + regs_lri[1].value = + _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, + enable ? + GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : + 0); - intel_ring_advance(rq, cs); + /* Modify the context image of pinned context with regs_context*/ + err = intel_context_lock_pinned(ce); + if (err) + return err; -out: - i915_request_add(rq); + err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context)); + intel_context_unlock_pinned(ce); + if (err) + return err; - return err; + /* Apply regs_lri using LRI with pinned context */ + return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri)); } /* @@ -2277,53 +2284,16 @@ static int gen12_emit_oar_config(struct intel_context *ce, bool enable) * per-context OA state. * * Note: it's only the RCS/Render context that has any OA state. + * Note: the first flex register passed must always be R_PWR_CLK_STATE */ -static int lrc_configure_all_contexts(struct i915_perf_stream *stream, - const struct i915_oa_config *oa_config) +static int oa_configure_all_contexts(struct i915_perf_stream *stream, + struct flex *regs, + size_t num_regs) { struct drm_i915_private *i915 = stream->perf->i915; - /* The MMIO offsets for Flex EU registers aren't contiguous */ - const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; -#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) - struct flex regs[] = { - { - GEN8_R_PWR_CLK_STATE, - CTX_R_PWR_CLK_STATE, - }, - { - IS_GEN(i915, 12) ? - GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL, - stream->perf->ctx_oactxctrl_offset + 1, - }, - { EU_PERF_CNTL0, ctx_flexeuN(0) }, - { EU_PERF_CNTL1, ctx_flexeuN(1) }, - { EU_PERF_CNTL2, ctx_flexeuN(2) }, - { EU_PERF_CNTL3, ctx_flexeuN(3) }, - { EU_PERF_CNTL4, ctx_flexeuN(4) }, - { EU_PERF_CNTL5, ctx_flexeuN(5) }, - { EU_PERF_CNTL6, ctx_flexeuN(6) }, - }; -#undef ctx_flexeuN struct intel_engine_cs *engine; struct i915_gem_context *ctx, *cn; - size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs); - int i, err; - - if (IS_GEN(i915, 12)) { - u32 format = stream->oa_buffer.format; - - regs[1].value = - (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | - (oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); - } else { - regs[1].value = - (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | - (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME; - } - - for (i = 2; !!ctx_flexeu0 && i < array_size; i++) - regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); + int err; lockdep_assert_held(&stream->perf->lock); @@ -2353,7 +2323,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, spin_unlock(&i915->gem.contexts.lock); - err = gen8_configure_context(ctx, regs, array_size); + err = gen8_configure_context(ctx, regs, num_regs); if (err) { i915_gem_context_put(ctx); return err; @@ -2378,7 +2348,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); - err = gen8_modify_self(ce, regs, array_size); + err = gen8_modify_self(ce, regs, num_regs); if (err) return err; } @@ -2386,6 +2356,56 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, return 0; } +static int gen12_configure_all_contexts(struct i915_perf_stream *stream, + const struct i915_oa_config *oa_config) +{ + struct flex regs[] = { + { + GEN8_R_PWR_CLK_STATE, + CTX_R_PWR_CLK_STATE, + }, + }; + + return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); +} + +static int lrc_configure_all_contexts(struct i915_perf_stream *stream, + const struct i915_oa_config *oa_config) +{ + /* The MMIO offsets for Flex EU registers aren't contiguous */ + const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; +#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) + struct flex regs[] = { + { + GEN8_R_PWR_CLK_STATE, + CTX_R_PWR_CLK_STATE, + }, + { + GEN8_OACTXCONTROL, + stream->perf->ctx_oactxctrl_offset + 1, + }, + { EU_PERF_CNTL0, ctx_flexeuN(0) }, + { EU_PERF_CNTL1, ctx_flexeuN(1) }, + { EU_PERF_CNTL2, ctx_flexeuN(2) }, + { EU_PERF_CNTL3, ctx_flexeuN(3) }, + { EU_PERF_CNTL4, ctx_flexeuN(4) }, + { EU_PERF_CNTL5, ctx_flexeuN(5) }, + { EU_PERF_CNTL6, ctx_flexeuN(6) }, + }; +#undef ctx_flexeuN + int i; + + regs[1].value = + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME; + + for (i = 2; i < ARRAY_SIZE(regs); i++) + regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); + + return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); +} + static int gen8_enable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; @@ -2469,7 +2489,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream) * to make sure all slices/subslices are ON before writing to NOA * registers. */ - ret = lrc_configure_all_contexts(stream, oa_config); + ret = gen12_configure_all_contexts(stream, oa_config); if (ret) return ret; @@ -2479,8 +2499,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream) * requested this. */ if (stream->ctx) { - ret = gen12_emit_oar_config(stream->pinned_ctx, - oa_config != NULL); + ret = gen12_configure_oar_context(stream, oa_config != NULL); if (ret) return ret; } @@ -2514,11 +2533,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ - lrc_configure_all_contexts(stream, NULL); + gen12_configure_all_contexts(stream, NULL); /* disable the context save/restore or OAR counters */ if (stream->ctx) - gen12_emit_oar_config(stream->pinned_ctx, false); + gen12_configure_oar_context(stream, false); /* Make sure we disable noa to save power. */ intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); @@ -2860,7 +2879,11 @@ void i915_oa_init_reg_state(const struct intel_context *ce, return; stream = engine->i915->perf.exclusive_stream; - if (stream) + /* + * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller + * is already doing that, so nothing to be done for gen12 here. + */ + if (stream && INTEL_GEN(stream->perf->i915) < 12) gen8_update_reg_state_unlocked(ce, stream); }
Gen12 supports saving/restoring render counters per context. Apply OAR configuration only for the context that is passed in to perf. v2: - Fix OACTXCONTROL value to only stop/resume counters. - Remove gen12_update_reg_state_unlocked as power state is already applied by the caller. Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> --- drivers/gpu/drm/i915/i915_perf.c | 193 +++++++++++++++++-------------- 1 file changed, 108 insertions(+), 85 deletions(-)