Message ID | 20220823204155.8178-6-umesh.nerlige.ramappa@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add DG2 OA support | expand |
On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote: > XEHPSDV and DG2 provide a way to configure bytes per clock vs commands > per clock reporting. Enable command per clock setting on enabling OA. > > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 3 +++ > drivers/gpu/drm/i915/i915_pci.c | 1 + > drivers/gpu/drm/i915/i915_perf.c | 20 ++++++++++++++++++++ > drivers/gpu/drm/i915/i915_perf_oa_regs.h | 4 ++++ > drivers/gpu/drm/i915/intel_device_info.h | 1 + > 5 files changed, 29 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index b4733c5a01da..b2e8a44bd976 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1287,6 +1287,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, > #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm) > #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc) > > +#define HAS_OA_BPC_REPORTING(dev_priv) \ > + (INTEL_INFO(dev_priv)->has_oa_bpc_reporting) > + > /* > * Set this flag, when platform requires 64K GTT page sizes or larger for > * device local memory access. > diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c > index d8446bb25d5e..bd0b8502b91e 100644 > --- a/drivers/gpu/drm/i915/i915_pci.c > +++ b/drivers/gpu/drm/i915/i915_pci.c > @@ -1019,6 +1019,7 @@ static const struct intel_device_info adl_p_info = { > .has_logical_ring_contexts = 1, \ > .has_logical_ring_elsq = 1, \ > .has_mslice_steering = 1, \ > + .has_oa_bpc_reporting = 1, \ > .has_rc6 = 1, \ > .has_reset_engine = 1, \ > .has_rps = 1, \ > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index efa7eda83edd..6fc4f0d8fc5a 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -2745,10 +2745,12 @@ static int > gen12_enable_metric_set(struct i915_perf_stream *stream, > struct i915_active *active) > { > + struct drm_i915_private *i915 = stream->perf->i915; > struct intel_uncore *uncore = stream->uncore; > struct i915_oa_config *oa_config = stream->oa_config; > bool periodic = stream->periodic; > u32 period_exponent = stream->period_exponent; > + u32 sqcnt1; > int ret; > > intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG, > @@ -2767,6 +2769,16 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, > (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT)) > : 0); > > + /* > + * Initialize Super Queue Internal Cnt Register > + * Set PMON Enable in order to collect valid metrics. > + * Enable commands per clock reporting in OA for XEHPSDV onward. > + */ > + sqcnt1 = GEN12_SQCNT1_PMON_ENABLE | > + (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0); > + > + intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1); > + > /* > * Update all contexts prior writing the mux configurations as we need > * to make sure all slices/subslices are ON before writing to NOA > @@ -2816,6 +2828,8 @@ static void gen11_disable_metric_set(struct i915_perf_stream *stream) > static void gen12_disable_metric_set(struct i915_perf_stream *stream) > { > struct intel_uncore *uncore = stream->uncore; > + struct drm_i915_private *i915 = stream->perf->i915; > + u32 sqcnt1; > > /* Reset all contexts' slices/subslices configurations. */ > gen12_configure_all_contexts(stream, NULL, NULL); > @@ -2826,6 +2840,12 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) > > /* Make sure we disable noa to save power. */ > intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); > + > + sqcnt1 = GEN12_SQCNT1_PMON_ENABLE | > + (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0); > + > + /* Reset PMON Enable to save power. */ > + intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0); > } > > static void gen7_oa_enable(struct i915_perf_stream *stream) > diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h > index 0ef3562ff4aa..381d94101610 100644 > --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h > +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h > @@ -134,4 +134,8 @@ > #define GDT_CHICKEN_BITS _MMIO(0x9840) > #define GT_NOA_ENABLE 0x00000080 > > +#define GEN12_SQCNT1 _MMIO(0x8718) > +#define GEN12_SQCNT1_PMON_ENABLE REG_BIT(30) > +#define GEN12_SQCNT1_OABPC REG_BIT(29) > + > #endif /* __INTEL_PERF_OA_REGS__ */ > diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h > index 23bf230aa104..fc2a0660426e 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.h > +++ b/drivers/gpu/drm/i915/intel_device_info.h > @@ -163,6 +163,7 @@ enum intel_ppgtt_type { > func(has_logical_ring_elsq); \ > func(has_media_ratio_mode); \ > func(has_mslice_steering); \ > + func(has_oa_bpc_reporting); \ > func(has_one_eu_per_fuse_bit); \ > func(has_pooled_eu); \ > func(has_pxp); \
On Tue, 23 Aug 2022 13:41:41 -0700, Umesh Nerlige Ramappa wrote: > Hi Umesh, > XEHPSDV and DG2 provide a way to configure bytes per clock vs commands > per clock reporting. Enable command per clock setting on enabling OA. What is the reason for selecting commands per clock vs bytes per clock? Also probably mention Bspec: 51762 in the commit message too. > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index efa7eda83edd..6fc4f0d8fc5a 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -2745,10 +2745,12 @@ static int > gen12_enable_metric_set(struct i915_perf_stream *stream, > struct i915_active *active) > { > + struct drm_i915_private *i915 = stream->perf->i915; > struct intel_uncore *uncore = stream->uncore; > struct i915_oa_config *oa_config = stream->oa_config; > bool periodic = stream->periodic; > u32 period_exponent = stream->period_exponent; > + u32 sqcnt1; > int ret; > > intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG, > @@ -2767,6 +2769,16 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, > (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT)) > : 0); > > + /* > + * Initialize Super Queue Internal Cnt Register > + * Set PMON Enable in order to collect valid metrics. > + * Enable commands per clock reporting in OA for XEHPSDV onward. > + */ > + sqcnt1 = GEN12_SQCNT1_PMON_ENABLE | > + (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0); Also from Bspec 0:Unitsof4cmd and 1:Unitsof128B so looks like bit 29 should be set to 0 for commands per clock setting? Or I am wrong? > + > + intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1); > + > /* > * Update all contexts prior writing the mux configurations as we need > * to make sure all slices/subslices are ON before writing to NOA > @@ -2816,6 +2828,8 @@ static void gen11_disable_metric_set(struct i915_perf_stream *stream) > static void gen12_disable_metric_set(struct i915_perf_stream *stream) > { > struct intel_uncore *uncore = stream->uncore; > + struct drm_i915_private *i915 = stream->perf->i915; > + u32 sqcnt1; > > /* Reset all contexts' slices/subslices configurations. */ > gen12_configure_all_contexts(stream, NULL, NULL); > @@ -2826,6 +2840,12 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) > > /* Make sure we disable noa to save power. */ > intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); > + > + sqcnt1 = GEN12_SQCNT1_PMON_ENABLE | > + (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0); > + > + /* Reset PMON Enable to save power. */ > + intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0); > } > > static void gen7_oa_enable(struct i915_perf_stream *stream) > diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h > index 0ef3562ff4aa..381d94101610 100644 > --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h > +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h > @@ -134,4 +134,8 @@ > #define GDT_CHICKEN_BITS _MMIO(0x9840) > #define GT_NOA_ENABLE 0x00000080 > > +#define GEN12_SQCNT1 _MMIO(0x8718) > +#define GEN12_SQCNT1_PMON_ENABLE REG_BIT(30) > +#define GEN12_SQCNT1_OABPC REG_BIT(29) > + > #endif /* __INTEL_PERF_OA_REGS__ */
On Tue, Sep 13, 2022 at 05:19:24PM -0700, Dixit, Ashutosh wrote: >On Tue, 23 Aug 2022 13:41:41 -0700, Umesh Nerlige Ramappa wrote: >> > >Hi Umesh, > >> XEHPSDV and DG2 provide a way to configure bytes per clock vs commands >> per clock reporting. Enable command per clock setting on enabling OA. should be: Enable bytes per clock setting > >What is the reason for selecting commands per clock vs bytes per clock? >Also probably mention Bspec: 51762 in the commit message too. It's a default configuration used to interpret the A36/A37 counters here - Bspec: 52201 > >> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >> index efa7eda83edd..6fc4f0d8fc5a 100644 >> --- a/drivers/gpu/drm/i915/i915_perf.c >> +++ b/drivers/gpu/drm/i915/i915_perf.c >> @@ -2745,10 +2745,12 @@ static int >> gen12_enable_metric_set(struct i915_perf_stream *stream, >> struct i915_active *active) >> { >> + struct drm_i915_private *i915 = stream->perf->i915; >> struct intel_uncore *uncore = stream->uncore; >> struct i915_oa_config *oa_config = stream->oa_config; >> bool periodic = stream->periodic; >> u32 period_exponent = stream->period_exponent; >> + u32 sqcnt1; >> int ret; >> >> intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG, >> @@ -2767,6 +2769,16 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, >> (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT)) >> : 0); >> >> + /* >> + * Initialize Super Queue Internal Cnt Register >> + * Set PMON Enable in order to collect valid metrics. >> + * Enable commands per clock reporting in OA for XEHPSDV onward. >> + */ >> + sqcnt1 = GEN12_SQCNT1_PMON_ENABLE | >> + (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0); > >Also from Bspec 0:Unitsof4cmd and 1:Unitsof128B so looks like bit 29 should >be set to 0 for commands per clock setting? Or I am wrong? I know bit 29 has to be set for DG2. I think the commit message is wrong. Nice catch, thanks > >> + >> + intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1); >> + >> /* >> * Update all contexts prior writing the mux configurations as we need >> * to make sure all slices/subslices are ON before writing to NOA >> @@ -2816,6 +2828,8 @@ static void gen11_disable_metric_set(struct i915_perf_stream *stream) >> static void gen12_disable_metric_set(struct i915_perf_stream *stream) >> { >> struct intel_uncore *uncore = stream->uncore; >> + struct drm_i915_private *i915 = stream->perf->i915; >> + u32 sqcnt1; >> >> /* Reset all contexts' slices/subslices configurations. */ >> gen12_configure_all_contexts(stream, NULL, NULL); >> @@ -2826,6 +2840,12 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) >> >> /* Make sure we disable noa to save power. */ >> intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); >> + >> + sqcnt1 = GEN12_SQCNT1_PMON_ENABLE | >> + (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0); >> + >> + /* Reset PMON Enable to save power. */ >> + intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0); >> } >> >> static void gen7_oa_enable(struct i915_perf_stream *stream) >> diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h >> index 0ef3562ff4aa..381d94101610 100644 >> --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h >> +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h >> @@ -134,4 +134,8 @@ >> #define GDT_CHICKEN_BITS _MMIO(0x9840) >> #define GT_NOA_ENABLE 0x00000080 >> >> +#define GEN12_SQCNT1 _MMIO(0x8718) >> +#define GEN12_SQCNT1_PMON_ENABLE REG_BIT(30) >> +#define GEN12_SQCNT1_OABPC REG_BIT(29) >> + >> #endif /* __INTEL_PERF_OA_REGS__ */
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b4733c5a01da..b2e8a44bd976 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1287,6 +1287,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm) #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc) +#define HAS_OA_BPC_REPORTING(dev_priv) \ + (INTEL_INFO(dev_priv)->has_oa_bpc_reporting) + /* * Set this flag, when platform requires 64K GTT page sizes or larger for * device local memory access. diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index d8446bb25d5e..bd0b8502b91e 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1019,6 +1019,7 @@ static const struct intel_device_info adl_p_info = { .has_logical_ring_contexts = 1, \ .has_logical_ring_elsq = 1, \ .has_mslice_steering = 1, \ + .has_oa_bpc_reporting = 1, \ .has_rc6 = 1, \ .has_reset_engine = 1, \ .has_rps = 1, \ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index efa7eda83edd..6fc4f0d8fc5a 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2745,10 +2745,12 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream, struct i915_active *active) { + struct drm_i915_private *i915 = stream->perf->i915; struct intel_uncore *uncore = stream->uncore; struct i915_oa_config *oa_config = stream->oa_config; bool periodic = stream->periodic; u32 period_exponent = stream->period_exponent; + u32 sqcnt1; int ret; intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG, @@ -2767,6 +2769,16 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT)) : 0); + /* + * Initialize Super Queue Internal Cnt Register + * Set PMON Enable in order to collect valid metrics. + * Enable commands per clock reporting in OA for XEHPSDV onward. + */ + sqcnt1 = GEN12_SQCNT1_PMON_ENABLE | + (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0); + + intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1); + /* * Update all contexts prior writing the mux configurations as we need * to make sure all slices/subslices are ON before writing to NOA @@ -2816,6 +2828,8 @@ static void gen11_disable_metric_set(struct i915_perf_stream *stream) static void gen12_disable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; + struct drm_i915_private *i915 = stream->perf->i915; + u32 sqcnt1; /* Reset all contexts' slices/subslices configurations. */ gen12_configure_all_contexts(stream, NULL, NULL); @@ -2826,6 +2840,12 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) /* Make sure we disable noa to save power. */ intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); + + sqcnt1 = GEN12_SQCNT1_PMON_ENABLE | + (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0); + + /* Reset PMON Enable to save power. */ + intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0); } static void gen7_oa_enable(struct i915_perf_stream *stream) diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h index 0ef3562ff4aa..381d94101610 100644 --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h @@ -134,4 +134,8 @@ #define GDT_CHICKEN_BITS _MMIO(0x9840) #define GT_NOA_ENABLE 0x00000080 +#define GEN12_SQCNT1 _MMIO(0x8718) +#define GEN12_SQCNT1_PMON_ENABLE REG_BIT(30) +#define GEN12_SQCNT1_OABPC REG_BIT(29) + #endif /* __INTEL_PERF_OA_REGS__ */ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 23bf230aa104..fc2a0660426e 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -163,6 +163,7 @@ enum intel_ppgtt_type { func(has_logical_ring_elsq); \ func(has_media_ratio_mode); \ func(has_mslice_steering); \ + func(has_oa_bpc_reporting); \ func(has_one_eu_per_fuse_bit); \ func(has_pooled_eu); \ func(has_pxp); \
XEHPSDV and DG2 provide a way to configure bytes per clock vs commands per clock reporting. Enable command per clock setting on enabling OA. Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/i915_perf.c | 20 ++++++++++++++++++++ drivers/gpu/drm/i915/i915_perf_oa_regs.h | 4 ++++ drivers/gpu/drm/i915/intel_device_info.h | 1 + 5 files changed, 29 insertions(+)