diff mbox series

[4/4] drm/i915/pmu: Add support for gen2

Message ID 20241008214349.23331-5-ville.syrjala@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: gen2 stuff | expand

Commit Message

Ville Syrjälä Oct. 8, 2024, 9:43 p.m. UTC
From: Ville Syrjälä <ville.syrjala@linux.intel.com>

Implement pmu support for gen2 so that one can use intel_gpu_top
on it once again.

Gen2 lacks MI_MODE/MODE_IDLE so we'll have to do a bit more work
to determine the state of the engine:
- to determine if the ring contains unconsumed data we can simply
  compare RING_TAIL vs. RING_HEAD
- also check RING_HEAD vs. ACTHD to catch cases where the hardware
  is still executing a batch buffer but the ring head has already
  caught up with the tail. Not entirely sure if that's actually
  possible or not, but maybe it can happen if the batch buffer is
  initiated from the very end of the ring? But even if not strictly
  necessary there's no real harm in checking anyway.
- MI_WAIT_FOR_EVENT can be detected via a dedicated bit in RING_HEAD

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_regs.h |  2 +-
 drivers/gpu/drm/i915/i915_pmu.c             | 32 +++++++++++++++++----
 2 files changed, 27 insertions(+), 7 deletions(-)

Comments

Jani Nikula Oct. 9, 2024, 11:17 a.m. UTC | #1
On Wed, 09 Oct 2024, Ville Syrjala <ville.syrjala@linux.intel.com> wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
>
> Implement pmu support for gen2 so that one can use intel_gpu_top
> on it once again.
>
> Gen2 lacks MI_MODE/MODE_IDLE so we'll have to do a bit more work
> to determine the state of the engine:
> - to determine if the ring contains unconsumed data we can simply
>   compare RING_TAIL vs. RING_HEAD
> - also check RING_HEAD vs. ACTHD to catch cases where the hardware
>   is still executing a batch buffer but the ring head has already
>   caught up with the tail. Not entirely sure if that's actually
>   possible or not, but maybe it can happen if the batch buffer is
>   initiated from the very end of the ring? But even if not strictly
>   necessary there's no real harm in checking anyway.
> - MI_WAIT_FOR_EVENT can be detected via a dedicated bit in RING_HEAD
>
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_regs.h |  2 +-
>  drivers/gpu/drm/i915/i915_pmu.c             | 32 +++++++++++++++++----
>  2 files changed, 27 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
> index a8eac59e3779..1c4784cb296c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
> @@ -15,6 +15,7 @@
>  #define   HEAD_WRAP_COUNT			0xFFE00000
>  #define   HEAD_WRAP_ONE				0x00200000
>  #define   HEAD_ADDR				0x001FFFFC
> +#define   HEAD_WAIT_I8XX			(1 << 0) /* gen2, PRBx_HEAD */
>  #define RING_START(base)			_MMIO((base) + 0x38)
>  #define RING_CTL(base)				_MMIO((base) + 0x3c)
>  #define   RING_CTL_SIZE(size)			((size) - PAGE_SIZE) /* in bytes -> pages */
> @@ -26,7 +27,6 @@
>  #define   RING_VALID_MASK			0x00000001
>  #define   RING_VALID				0x00000001
>  #define   RING_INVALID				0x00000000
> -#define   RING_WAIT_I8XX			(1 << 0) /* gen2, PRBx_HEAD */
>  #define   RING_WAIT				(1 << 11) /* gen3+, PRBx_CTL */
>  #define   RING_WAIT_SEMAPHORE			(1 << 10) /* gen6+ */
>  #define RING_SYNC_0(base)			_MMIO((base) + 0x40)
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 67b6cbdeff1d..2eed4c866321 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -356,7 +356,7 @@ static bool exclusive_mmio_access(const struct drm_i915_private *i915)
>  	return GRAPHICS_VER(i915) == 7;
>  }
>  
> -static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
> +static void engine_sample_gen3(struct intel_engine_cs *engine, unsigned int period_ns)

A bit surprising to see a gen3 suffix rather than a prefix.

Anyway, not going to spend time on looking into gen2 details, but it's
fine,

Acked-by: Jani Nikula <jani.nikula@intel.com>


>  {
>  	struct intel_engine_pmu *pmu = &engine->pmu;
>  	bool busy;
> @@ -391,6 +391,31 @@ static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns
>  		add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
>  }
>  
> +static void engine_sample_gen2(struct intel_engine_cs *engine, unsigned int period_ns)
> +{
> +	struct intel_engine_pmu *pmu = &engine->pmu;
> +	u32 tail, head, acthd;
> +
> +	tail = ENGINE_READ_FW(engine, RING_TAIL);
> +	head = ENGINE_READ_FW(engine, RING_HEAD);
> +	acthd = ENGINE_READ_FW(engine, ACTHD);
> +
> +	if (head & HEAD_WAIT_I8XX)
> +		add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
> +
> +	if (head & HEAD_WAIT_I8XX || head != acthd ||
> +	    (head & HEAD_ADDR) != (tail & TAIL_ADDR))
> +		add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
> +}
> +
> +static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
> +{
> +	if (GRAPHICS_VER(engine->i915) >= 3)
> +		engine_sample_gen3(engine, period_ns);
> +	else
> +		engine_sample_gen2(engine, period_ns);
> +}
> +
>  static void
>  engines_sample(struct intel_gt *gt, unsigned int period_ns)
>  {
> @@ -1243,11 +1268,6 @@ void i915_pmu_register(struct drm_i915_private *i915)
>  
>  	int ret = -ENOMEM;
>  
> -	if (GRAPHICS_VER(i915) <= 2) {
> -		drm_info(&i915->drm, "PMU not supported for this GPU.");
> -		return;
> -	}
> -
>  	spin_lock_init(&pmu->lock);
>  	hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>  	pmu->timer.function = i915_sample;
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index a8eac59e3779..1c4784cb296c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -15,6 +15,7 @@ 
 #define   HEAD_WRAP_COUNT			0xFFE00000
 #define   HEAD_WRAP_ONE				0x00200000
 #define   HEAD_ADDR				0x001FFFFC
+#define   HEAD_WAIT_I8XX			(1 << 0) /* gen2, PRBx_HEAD */
 #define RING_START(base)			_MMIO((base) + 0x38)
 #define RING_CTL(base)				_MMIO((base) + 0x3c)
 #define   RING_CTL_SIZE(size)			((size) - PAGE_SIZE) /* in bytes -> pages */
@@ -26,7 +27,6 @@ 
 #define   RING_VALID_MASK			0x00000001
 #define   RING_VALID				0x00000001
 #define   RING_INVALID				0x00000000
-#define   RING_WAIT_I8XX			(1 << 0) /* gen2, PRBx_HEAD */
 #define   RING_WAIT				(1 << 11) /* gen3+, PRBx_CTL */
 #define   RING_WAIT_SEMAPHORE			(1 << 10) /* gen6+ */
 #define RING_SYNC_0(base)			_MMIO((base) + 0x40)
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 67b6cbdeff1d..2eed4c866321 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -356,7 +356,7 @@  static bool exclusive_mmio_access(const struct drm_i915_private *i915)
 	return GRAPHICS_VER(i915) == 7;
 }
 
-static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
+static void engine_sample_gen3(struct intel_engine_cs *engine, unsigned int period_ns)
 {
 	struct intel_engine_pmu *pmu = &engine->pmu;
 	bool busy;
@@ -391,6 +391,31 @@  static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns
 		add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
 }
 
+static void engine_sample_gen2(struct intel_engine_cs *engine, unsigned int period_ns)
+{
+	struct intel_engine_pmu *pmu = &engine->pmu;
+	u32 tail, head, acthd;
+
+	tail = ENGINE_READ_FW(engine, RING_TAIL);
+	head = ENGINE_READ_FW(engine, RING_HEAD);
+	acthd = ENGINE_READ_FW(engine, ACTHD);
+
+	if (head & HEAD_WAIT_I8XX)
+		add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
+
+	if (head & HEAD_WAIT_I8XX || head != acthd ||
+	    (head & HEAD_ADDR) != (tail & TAIL_ADDR))
+		add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
+}
+
+static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
+{
+	if (GRAPHICS_VER(engine->i915) >= 3)
+		engine_sample_gen3(engine, period_ns);
+	else
+		engine_sample_gen2(engine, period_ns);
+}
+
 static void
 engines_sample(struct intel_gt *gt, unsigned int period_ns)
 {
@@ -1243,11 +1268,6 @@  void i915_pmu_register(struct drm_i915_private *i915)
 
 	int ret = -ENOMEM;
 
-	if (GRAPHICS_VER(i915) <= 2) {
-		drm_info(&i915->drm, "PMU not supported for this GPU.");
-		return;
-	}
-
 	spin_lock_init(&pmu->lock);
 	hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	pmu->timer.function = i915_sample;