diff mbox series

[05/19] drm/i915/perf: Enable commands per clock reporting in OA

Message ID 20220823204155.8178-6-umesh.nerlige.ramappa@intel.com (mailing list archive)
State New, archived
Headers show
Series Add DG2 OA support | expand

Commit Message

Umesh Nerlige Ramappa Aug. 23, 2022, 8:41 p.m. UTC
XEHPSDV and DG2 provide a way to configure bytes per clock vs commands
per clock reporting. Enable command per clock setting on enabling OA.

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |  3 +++
 drivers/gpu/drm/i915/i915_pci.c          |  1 +
 drivers/gpu/drm/i915/i915_perf.c         | 20 ++++++++++++++++++++
 drivers/gpu/drm/i915/i915_perf_oa_regs.h |  4 ++++
 drivers/gpu/drm/i915/intel_device_info.h |  1 +
 5 files changed, 29 insertions(+)

Comments

Lionel Landwerlin Sept. 6, 2022, 7:51 p.m. UTC | #1
On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote:
> XEHPSDV and DG2 provide a way to configure bytes per clock vs commands
> per clock reporting. Enable command per clock setting on enabling OA.
>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h          |  3 +++
>   drivers/gpu/drm/i915/i915_pci.c          |  1 +
>   drivers/gpu/drm/i915/i915_perf.c         | 20 ++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_perf_oa_regs.h |  4 ++++
>   drivers/gpu/drm/i915/intel_device_info.h |  1 +
>   5 files changed, 29 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index b4733c5a01da..b2e8a44bd976 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1287,6 +1287,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
>   #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm)
>   #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc)
>   
> +#define HAS_OA_BPC_REPORTING(dev_priv) \
> +	(INTEL_INFO(dev_priv)->has_oa_bpc_reporting)
> +
>   /*
>    * Set this flag, when platform requires 64K GTT page sizes or larger for
>    * device local memory access.
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index d8446bb25d5e..bd0b8502b91e 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -1019,6 +1019,7 @@ static const struct intel_device_info adl_p_info = {
>   	.has_logical_ring_contexts = 1, \
>   	.has_logical_ring_elsq = 1, \
>   	.has_mslice_steering = 1, \
> +	.has_oa_bpc_reporting = 1, \
>   	.has_rc6 = 1, \
>   	.has_reset_engine = 1, \
>   	.has_rps = 1, \
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index efa7eda83edd..6fc4f0d8fc5a 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -2745,10 +2745,12 @@ static int
>   gen12_enable_metric_set(struct i915_perf_stream *stream,
>   			struct i915_active *active)
>   {
> +	struct drm_i915_private *i915 = stream->perf->i915;
>   	struct intel_uncore *uncore = stream->uncore;
>   	struct i915_oa_config *oa_config = stream->oa_config;
>   	bool periodic = stream->periodic;
>   	u32 period_exponent = stream->period_exponent;
> +	u32 sqcnt1;
>   	int ret;
>   
>   	intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
> @@ -2767,6 +2769,16 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
>   			    (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
>   			    : 0);
>   
> + 	/*
> + 	 * Initialize Super Queue Internal Cnt Register
> + 	 * Set PMON Enable in order to collect valid metrics.
> +	 * Enable commands per clock reporting in OA for XEHPSDV onward.
> + 	 */
> +	sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
> +		 (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
> +
> +	intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1);
> +
>   	/*
>   	 * Update all contexts prior writing the mux configurations as we need
>   	 * to make sure all slices/subslices are ON before writing to NOA
> @@ -2816,6 +2828,8 @@ static void gen11_disable_metric_set(struct i915_perf_stream *stream)
>   static void gen12_disable_metric_set(struct i915_perf_stream *stream)
>   {
>   	struct intel_uncore *uncore = stream->uncore;
> +	struct drm_i915_private *i915 = stream->perf->i915;
> +	u32 sqcnt1;
>   
>   	/* Reset all contexts' slices/subslices configurations. */
>   	gen12_configure_all_contexts(stream, NULL, NULL);
> @@ -2826,6 +2840,12 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
>   
>   	/* Make sure we disable noa to save power. */
>   	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
> +
> +	sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
> +		 (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
> +
> + 	/* Reset PMON Enable to save power. */
> +	intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0);
>   }
>   
>   static void gen7_oa_enable(struct i915_perf_stream *stream)
> diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> index 0ef3562ff4aa..381d94101610 100644
> --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> @@ -134,4 +134,8 @@
>   #define GDT_CHICKEN_BITS    _MMIO(0x9840)
>   #define   GT_NOA_ENABLE	    0x00000080
>   
> +#define GEN12_SQCNT1				_MMIO(0x8718)
> +#define   GEN12_SQCNT1_PMON_ENABLE		REG_BIT(30)
> +#define   GEN12_SQCNT1_OABPC			REG_BIT(29)
> +
>   #endif /* __INTEL_PERF_OA_REGS__ */
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 23bf230aa104..fc2a0660426e 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -163,6 +163,7 @@ enum intel_ppgtt_type {
>   	func(has_logical_ring_elsq); \
>   	func(has_media_ratio_mode); \
>   	func(has_mslice_steering); \
> +	func(has_oa_bpc_reporting); \
>   	func(has_one_eu_per_fuse_bit); \
>   	func(has_pooled_eu); \
>   	func(has_pxp); \
Dixit, Ashutosh Sept. 14, 2022, 12:19 a.m. UTC | #2
On Tue, 23 Aug 2022 13:41:41 -0700, Umesh Nerlige Ramappa wrote:
>

Hi Umesh,

> XEHPSDV and DG2 provide a way to configure bytes per clock vs commands
> per clock reporting. Enable command per clock setting on enabling OA.

What is the reason for selecting commands per clock vs bytes per clock?
Also probably mention Bspec: 51762 in the commit message too.

> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index efa7eda83edd..6fc4f0d8fc5a 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -2745,10 +2745,12 @@ static int
>  gen12_enable_metric_set(struct i915_perf_stream *stream,
>			struct i915_active *active)
>  {
> +	struct drm_i915_private *i915 = stream->perf->i915;
>	struct intel_uncore *uncore = stream->uncore;
>	struct i915_oa_config *oa_config = stream->oa_config;
>	bool periodic = stream->periodic;
>	u32 period_exponent = stream->period_exponent;
> +	u32 sqcnt1;
>	int ret;
>
>	intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
> @@ -2767,6 +2769,16 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
>			    (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
>			    : 0);
>
> +	/*
> +	 * Initialize Super Queue Internal Cnt Register
> +	 * Set PMON Enable in order to collect valid metrics.
> +	 * Enable commands per clock reporting in OA for XEHPSDV onward.
> +	 */
> +	sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
> +		 (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);

Also from Bspec 0:Unitsof4cmd and 1:Unitsof128B so looks like bit 29 should
be set to 0 for commands per clock setting? Or I am wrong?

> +
> +	intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1);
> +
>	/*
>	 * Update all contexts prior writing the mux configurations as we need
>	 * to make sure all slices/subslices are ON before writing to NOA
> @@ -2816,6 +2828,8 @@ static void gen11_disable_metric_set(struct i915_perf_stream *stream)
>  static void gen12_disable_metric_set(struct i915_perf_stream *stream)
>  {
>	struct intel_uncore *uncore = stream->uncore;
> +	struct drm_i915_private *i915 = stream->perf->i915;
> +	u32 sqcnt1;
>
>	/* Reset all contexts' slices/subslices configurations. */
>	gen12_configure_all_contexts(stream, NULL, NULL);
> @@ -2826,6 +2840,12 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
>
>	/* Make sure we disable noa to save power. */
>	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
> +
> +	sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
> +		 (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
> +
> +	/* Reset PMON Enable to save power. */
> +	intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0);
>  }
>
>  static void gen7_oa_enable(struct i915_perf_stream *stream)
> diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> index 0ef3562ff4aa..381d94101610 100644
> --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> @@ -134,4 +134,8 @@
>  #define GDT_CHICKEN_BITS    _MMIO(0x9840)
>  #define   GT_NOA_ENABLE	    0x00000080
>
> +#define GEN12_SQCNT1				_MMIO(0x8718)
> +#define   GEN12_SQCNT1_PMON_ENABLE		REG_BIT(30)
> +#define   GEN12_SQCNT1_OABPC			REG_BIT(29)
> +
>  #endif /* __INTEL_PERF_OA_REGS__ */
Umesh Nerlige Ramappa Sept. 15, 2022, 12:04 a.m. UTC | #3
On Tue, Sep 13, 2022 at 05:19:24PM -0700, Dixit, Ashutosh wrote:
>On Tue, 23 Aug 2022 13:41:41 -0700, Umesh Nerlige Ramappa wrote:
>>
>
>Hi Umesh,
>
>> XEHPSDV and DG2 provide a way to configure bytes per clock vs commands
>> per clock reporting. Enable command per clock setting on enabling OA.

should be: Enable bytes per clock setting
>
>What is the reason for selecting commands per clock vs bytes per clock?
>Also probably mention Bspec: 51762 in the commit message too.

It's a default configuration used to interpret the A36/A37 counters here 
- Bspec: 52201

>
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> index efa7eda83edd..6fc4f0d8fc5a 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -2745,10 +2745,12 @@ static int
>>  gen12_enable_metric_set(struct i915_perf_stream *stream,
>>			struct i915_active *active)
>>  {
>> +	struct drm_i915_private *i915 = stream->perf->i915;
>>	struct intel_uncore *uncore = stream->uncore;
>>	struct i915_oa_config *oa_config = stream->oa_config;
>>	bool periodic = stream->periodic;
>>	u32 period_exponent = stream->period_exponent;
>> +	u32 sqcnt1;
>>	int ret;
>>
>>	intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
>> @@ -2767,6 +2769,16 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
>>			    (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
>>			    : 0);
>>
>> +	/*
>> +	 * Initialize Super Queue Internal Cnt Register
>> +	 * Set PMON Enable in order to collect valid metrics.
>> +	 * Enable commands per clock reporting in OA for XEHPSDV onward.
>> +	 */
>> +	sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
>> +		 (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
>
>Also from Bspec 0:Unitsof4cmd and 1:Unitsof128B so looks like bit 29 should
>be set to 0 for commands per clock setting? Or I am wrong?

I know bit 29 has to be set for DG2. I think the commit message is 
wrong. Nice catch, thanks

>
>> +
>> +	intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1);
>> +
>>	/*
>>	 * Update all contexts prior writing the mux configurations as we need
>>	 * to make sure all slices/subslices are ON before writing to NOA
>> @@ -2816,6 +2828,8 @@ static void gen11_disable_metric_set(struct i915_perf_stream *stream)
>>  static void gen12_disable_metric_set(struct i915_perf_stream *stream)
>>  {
>>	struct intel_uncore *uncore = stream->uncore;
>> +	struct drm_i915_private *i915 = stream->perf->i915;
>> +	u32 sqcnt1;
>>
>>	/* Reset all contexts' slices/subslices configurations. */
>>	gen12_configure_all_contexts(stream, NULL, NULL);
>> @@ -2826,6 +2840,12 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
>>
>>	/* Make sure we disable noa to save power. */
>>	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
>> +
>> +	sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
>> +		 (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
>> +
>> +	/* Reset PMON Enable to save power. */
>> +	intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0);
>>  }
>>
>>  static void gen7_oa_enable(struct i915_perf_stream *stream)
>> diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
>> index 0ef3562ff4aa..381d94101610 100644
>> --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h
>> +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
>> @@ -134,4 +134,8 @@
>>  #define GDT_CHICKEN_BITS    _MMIO(0x9840)
>>  #define   GT_NOA_ENABLE	    0x00000080
>>
>> +#define GEN12_SQCNT1				_MMIO(0x8718)
>> +#define   GEN12_SQCNT1_PMON_ENABLE		REG_BIT(30)
>> +#define   GEN12_SQCNT1_OABPC			REG_BIT(29)
>> +
>>  #endif /* __INTEL_PERF_OA_REGS__ */
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b4733c5a01da..b2e8a44bd976 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1287,6 +1287,9 @@  IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm)
 #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc)
 
+#define HAS_OA_BPC_REPORTING(dev_priv) \
+	(INTEL_INFO(dev_priv)->has_oa_bpc_reporting)
+
 /*
  * Set this flag, when platform requires 64K GTT page sizes or larger for
  * device local memory access.
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index d8446bb25d5e..bd0b8502b91e 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1019,6 +1019,7 @@  static const struct intel_device_info adl_p_info = {
 	.has_logical_ring_contexts = 1, \
 	.has_logical_ring_elsq = 1, \
 	.has_mslice_steering = 1, \
+	.has_oa_bpc_reporting = 1, \
 	.has_rc6 = 1, \
 	.has_reset_engine = 1, \
 	.has_rps = 1, \
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index efa7eda83edd..6fc4f0d8fc5a 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2745,10 +2745,12 @@  static int
 gen12_enable_metric_set(struct i915_perf_stream *stream,
 			struct i915_active *active)
 {
+	struct drm_i915_private *i915 = stream->perf->i915;
 	struct intel_uncore *uncore = stream->uncore;
 	struct i915_oa_config *oa_config = stream->oa_config;
 	bool periodic = stream->periodic;
 	u32 period_exponent = stream->period_exponent;
+	u32 sqcnt1;
 	int ret;
 
 	intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
@@ -2767,6 +2769,16 @@  gen12_enable_metric_set(struct i915_perf_stream *stream,
 			    (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
 			    : 0);
 
+ 	/*
+ 	 * Initialize Super Queue Internal Cnt Register
+ 	 * Set PMON Enable in order to collect valid metrics.
+	 * Enable commands per clock reporting in OA for XEHPSDV onward.
+ 	 */
+	sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+		 (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
+
+	intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1);
+
 	/*
 	 * Update all contexts prior writing the mux configurations as we need
 	 * to make sure all slices/subslices are ON before writing to NOA
@@ -2816,6 +2828,8 @@  static void gen11_disable_metric_set(struct i915_perf_stream *stream)
 static void gen12_disable_metric_set(struct i915_perf_stream *stream)
 {
 	struct intel_uncore *uncore = stream->uncore;
+	struct drm_i915_private *i915 = stream->perf->i915;
+	u32 sqcnt1;
 
 	/* Reset all contexts' slices/subslices configurations. */
 	gen12_configure_all_contexts(stream, NULL, NULL);
@@ -2826,6 +2840,12 @@  static void gen12_disable_metric_set(struct i915_perf_stream *stream)
 
 	/* Make sure we disable noa to save power. */
 	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
+
+	sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+		 (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
+
+ 	/* Reset PMON Enable to save power. */
+	intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0);
 }
 
 static void gen7_oa_enable(struct i915_perf_stream *stream)
diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
index 0ef3562ff4aa..381d94101610 100644
--- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h
+++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
@@ -134,4 +134,8 @@ 
 #define GDT_CHICKEN_BITS    _MMIO(0x9840)
 #define   GT_NOA_ENABLE	    0x00000080
 
+#define GEN12_SQCNT1				_MMIO(0x8718)
+#define   GEN12_SQCNT1_PMON_ENABLE		REG_BIT(30)
+#define   GEN12_SQCNT1_OABPC			REG_BIT(29)
+
 #endif /* __INTEL_PERF_OA_REGS__ */
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 23bf230aa104..fc2a0660426e 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -163,6 +163,7 @@  enum intel_ppgtt_type {
 	func(has_logical_ring_elsq); \
 	func(has_media_ratio_mode); \
 	func(has_mslice_steering); \
+	func(has_oa_bpc_reporting); \
 	func(has_one_eu_per_fuse_bit); \
 	func(has_pooled_eu); \
 	func(has_pxp); \