Message ID | 20191018005028.47441-2-umesh.nerlige.ramappa@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2,1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers | expand |
On 18/10/2019 03:50, Umesh Nerlige Ramappa wrote: > From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > > We want this so we can preempt performance queries and keep the system > responsive even when long running queries are ongoing. We avoid doing > it for all contexts. > > v2: use LRI to modify context control (Chris) > v3: use MASKED_FIELD to program just the masked bits (Chris) > v4: reuse request created during emit_oa_config (Lionel) > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Looks good : Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_lrc.h | 1 + > drivers/gpu/drm/i915/i915_perf.c | 35 ++++++++++++++++++++++++++++- > 2 files changed, 35 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h > index 99dc576a4e25..b6daac712c9e 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.h > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h > @@ -43,6 +43,7 @@ struct intel_engine_cs; > #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) > #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) > #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) > +#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8) > #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) > #define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) > #define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 91707558a0f5..ce97af484a32 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -1862,14 +1862,36 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) > return i915_vma_get(oa_bo->vma); > } > > +static int gen12_emit_oar_config(struct i915_request *rq, > + struct intel_context *ce, > + bool enable) > +{ > + u32 *cs; > + > + cs = intel_ring_begin(rq, 4); > + if (IS_ERR(cs)) > + return PTR_ERR(cs); > + > + *cs++ = MI_LOAD_REGISTER_IMM(1); > + *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base)); > + *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, > + enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0); > + *cs++ = MI_NOOP; > + > + intel_ring_advance(rq, cs); > + > + return 0; > +} > + > static int emit_oa_config(struct i915_perf_stream *stream, > struct intel_context *ce) > { > + struct i915_oa_config *oa_config = stream->oa_config; > struct i915_request *rq; > struct i915_vma *vma; > int err; > > - vma = get_oa_vma(stream, stream->oa_config); > + vma = get_oa_vma(stream, oa_config); > if (IS_ERR(vma)) > return PTR_ERR(vma); > > @@ -1891,6 +1913,17 @@ static int emit_oa_config(struct i915_perf_stream *stream, > if (err) > goto err_add_request; > > + /* > + * For Gen12, performance counters are context saved/restored. > + * Only enable it for the context that requested this. > + */ > + if (ce == stream->pinned_ctx && IS_GEN(stream->perf->i915, 12)) { > + err = gen12_emit_oar_config(rq, ce, oa_config != NULL); > + if (err) > + goto err_add_request; > + > + } > + > err = rq->engine->emit_bb_start(rq, > vma->node.start, 0, > I915_DISPATCH_SECURE);
Quoting Umesh Nerlige Ramappa (2019-10-18 01:50:27) > From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > > We want this so we can preempt performance queries and keep the system > responsive even when long running queries are ongoing. We avoid doing > it for all contexts. > > v2: use LRI to modify context control (Chris) > v3: use MASKED_FIELD to program just the masked bits (Chris) > v4: reuse request created during emit_oa_config (Lionel) > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_lrc.h | 1 + > drivers/gpu/drm/i915/i915_perf.c | 35 ++++++++++++++++++++++++++++- > 2 files changed, 35 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h > index 99dc576a4e25..b6daac712c9e 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.h > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h > @@ -43,6 +43,7 @@ struct intel_engine_cs; > #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) > #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) > #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) > +#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8) > #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) > #define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) > #define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 91707558a0f5..ce97af484a32 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -1862,14 +1862,36 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) > return i915_vma_get(oa_bo->vma); > } > > +static int gen12_emit_oar_config(struct i915_request *rq, > + struct intel_context *ce, > + bool enable) > +{ > + u32 *cs; > + > + cs = intel_ring_begin(rq, 4); > + if (IS_ERR(cs)) > + return PTR_ERR(cs); > + > + *cs++ = MI_LOAD_REGISTER_IMM(1); > + *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base)); > + *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, > + enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0); > + *cs++ = MI_NOOP; > + > + intel_ring_advance(rq, cs); > + > + return 0; > +} > + > static int emit_oa_config(struct i915_perf_stream *stream, > struct intel_context *ce) > { > + struct i915_oa_config *oa_config = stream->oa_config; > struct i915_request *rq; > struct i915_vma *vma; > int err; > > - vma = get_oa_vma(stream, stream->oa_config); > + vma = get_oa_vma(stream, oa_config); > if (IS_ERR(vma)) > return PTR_ERR(vma); > > @@ -1891,6 +1913,17 @@ static int emit_oa_config(struct i915_perf_stream *stream, > if (err) > goto err_add_request; > > + /* > + * For Gen12, performance counters are context saved/restored. > + * Only enable it for the context that requested this. > + */ > + if (ce == stream->pinned_ctx && IS_GEN(stream->perf->i915, 12)) { > + err = gen12_emit_oar_config(rq, ce, oa_config != NULL); > + if (err) > + goto err_add_request; > + > + } Do we need to do emit_oa_config(stream, NULL) in gen8_disable_metric_set()? -Chris --------------------------------------------------------------------- Intel Corporation (UK) Limited Registered No. 1134945 (England) Registered Office: Pipers Way, Swindon SN3 1RJ VAT No: 860 2173 47 This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies.
On 19/10/2019 02:22, Chris Wilson wrote: > Quoting Umesh Nerlige Ramappa (2019-10-18 01:50:27) >> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >> >> We want this so we can preempt performance queries and keep the system >> responsive even when long running queries are ongoing. We avoid doing >> it for all contexts. >> >> v2: use LRI to modify context control (Chris) >> v3: use MASKED_FIELD to program just the masked bits (Chris) >> v4: reuse request created during emit_oa_config (Lionel) >> >> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> >> --- >> drivers/gpu/drm/i915/gt/intel_lrc.h | 1 + >> drivers/gpu/drm/i915/i915_perf.c | 35 ++++++++++++++++++++++++++++- >> 2 files changed, 35 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h >> index 99dc576a4e25..b6daac712c9e 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h >> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h >> @@ -43,6 +43,7 @@ struct intel_engine_cs; >> #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) >> #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) >> #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) >> +#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8) >> #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) >> #define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) >> #define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) >> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >> index 91707558a0f5..ce97af484a32 100644 >> --- a/drivers/gpu/drm/i915/i915_perf.c >> +++ b/drivers/gpu/drm/i915/i915_perf.c >> @@ -1862,14 +1862,36 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) >> return i915_vma_get(oa_bo->vma); >> } >> >> +static int gen12_emit_oar_config(struct i915_request *rq, >> + struct intel_context *ce, >> + bool enable) >> +{ >> + u32 *cs; >> + >> + cs = intel_ring_begin(rq, 4); >> + if (IS_ERR(cs)) >> + return PTR_ERR(cs); >> + >> + *cs++ = MI_LOAD_REGISTER_IMM(1); >> + *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base)); >> + *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, >> + enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0); >> + *cs++ = MI_NOOP; >> + >> + intel_ring_advance(rq, cs); >> + >> + return 0; >> +} >> + >> static int emit_oa_config(struct i915_perf_stream *stream, >> struct intel_context *ce) >> { >> + struct i915_oa_config *oa_config = stream->oa_config; >> struct i915_request *rq; >> struct i915_vma *vma; >> int err; >> >> - vma = get_oa_vma(stream, stream->oa_config); >> + vma = get_oa_vma(stream, oa_config); >> if (IS_ERR(vma)) >> return PTR_ERR(vma); >> >> @@ -1891,6 +1913,17 @@ static int emit_oa_config(struct i915_perf_stream *stream, >> if (err) >> goto err_add_request; >> >> + /* >> + * For Gen12, performance counters are context saved/restored. >> + * Only enable it for the context that requested this. >> + */ >> + if (ce == stream->pinned_ctx && IS_GEN(stream->perf->i915, 12)) { >> + err = gen12_emit_oar_config(rq, ce, oa_config != NULL); >> + if (err) >> + goto err_add_request; >> + >> + } > Do we need to do emit_oa_config(stream, NULL) in > gen8_disable_metric_set()? > -Chris > Oh crap... I must have pointed to the wrong function then... I thought this was called from enable/disable_metric_set(). I guess the best thing would be to call this from gen12_enable/disable_metric_set(). But then we would need to create a request for the pinned_ctx. Sorry :( -Lionel
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 99dc576a4e25..b6daac712c9e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -43,6 +43,7 @@ struct intel_engine_cs; #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) +#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8) #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) #define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) #define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 91707558a0f5..ce97af484a32 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1862,14 +1862,36 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) return i915_vma_get(oa_bo->vma); } +static int gen12_emit_oar_config(struct i915_request *rq, + struct intel_context *ce, + bool enable) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base)); + *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, + enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0); + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + return 0; +} + static int emit_oa_config(struct i915_perf_stream *stream, struct intel_context *ce) { + struct i915_oa_config *oa_config = stream->oa_config; struct i915_request *rq; struct i915_vma *vma; int err; - vma = get_oa_vma(stream, stream->oa_config); + vma = get_oa_vma(stream, oa_config); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -1891,6 +1913,17 @@ static int emit_oa_config(struct i915_perf_stream *stream, if (err) goto err_add_request; + /* + * For Gen12, performance counters are context saved/restored. + * Only enable it for the context that requested this. + */ + if (ce == stream->pinned_ctx && IS_GEN(stream->perf->i915, 12)) { + err = gen12_emit_oar_config(rq, ce, oa_config != NULL); + if (err) + goto err_add_request; + + } + err = rq->engine->emit_bb_start(rq, vma->node.start, 0, I915_DISPATCH_SECURE);