Message ID | 20241008214349.23331-5-ville.syrjala@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915: gen2 stuff | expand |
On Wed, 09 Oct 2024, Ville Syrjala <ville.syrjala@linux.intel.com> wrote: > From: Ville Syrjälä <ville.syrjala@linux.intel.com> > > Implement pmu support for gen2 so that one can use intel_gpu_top > on it once again. > > Gen2 lacks MI_MODE/MODE_IDLE so we'll have to do a bit more work > to determine the state of the engine: > - to determine if the ring contains unconsumed data we can simply > compare RING_TAIL vs. RING_HEAD > - also check RING_HEAD vs. ACTHD to catch cases where the hardware > is still executing a batch buffer but the ring head has already > caught up with the tail. Not entirely sure if that's actually > possible or not, but maybe it can happen if the batch buffer is > initiated from the very end of the ring? But even if not strictly > necessary there's no real harm in checking anyway. > - MI_WAIT_FOR_EVENT can be detected via a dedicated bit in RING_HEAD > > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> > --- > drivers/gpu/drm/i915/gt/intel_engine_regs.h | 2 +- > drivers/gpu/drm/i915/i915_pmu.c | 32 +++++++++++++++++---- > 2 files changed, 27 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h > index a8eac59e3779..1c4784cb296c 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h > +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h > @@ -15,6 +15,7 @@ > #define HEAD_WRAP_COUNT 0xFFE00000 > #define HEAD_WRAP_ONE 0x00200000 > #define HEAD_ADDR 0x001FFFFC > +#define HEAD_WAIT_I8XX (1 << 0) /* gen2, PRBx_HEAD */ > #define RING_START(base) _MMIO((base) + 0x38) > #define RING_CTL(base) _MMIO((base) + 0x3c) > #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ > @@ -26,7 +27,6 @@ > #define RING_VALID_MASK 0x00000001 > #define RING_VALID 0x00000001 > #define RING_INVALID 0x00000000 > -#define RING_WAIT_I8XX (1 << 0) /* gen2, PRBx_HEAD */ > #define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */ > #define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */ > #define RING_SYNC_0(base) _MMIO((base) + 0x40) > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c > index 67b6cbdeff1d..2eed4c866321 100644 > --- a/drivers/gpu/drm/i915/i915_pmu.c > +++ b/drivers/gpu/drm/i915/i915_pmu.c > @@ -356,7 +356,7 @@ static bool exclusive_mmio_access(const struct drm_i915_private *i915) > return GRAPHICS_VER(i915) == 7; > } > > -static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) > +static void engine_sample_gen3(struct intel_engine_cs *engine, unsigned int period_ns) A bit surprising to see a gen3 suffix rather than a prefix. Anyway, not going to spend time on looking into gen2 details, but it's fine, Acked-by: Jani Nikula <jani.nikula@intel.com> > { > struct intel_engine_pmu *pmu = &engine->pmu; > bool busy; > @@ -391,6 +391,31 @@ static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns > add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); > } > > +static void engine_sample_gen2(struct intel_engine_cs *engine, unsigned int period_ns) > +{ > + struct intel_engine_pmu *pmu = &engine->pmu; > + u32 tail, head, acthd; > + > + tail = ENGINE_READ_FW(engine, RING_TAIL); > + head = ENGINE_READ_FW(engine, RING_HEAD); > + acthd = ENGINE_READ_FW(engine, ACTHD); > + > + if (head & HEAD_WAIT_I8XX) > + add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); > + > + if (head & HEAD_WAIT_I8XX || head != acthd || > + (head & HEAD_ADDR) != (tail & TAIL_ADDR)) > + add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); > +} > + > +static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) > +{ > + if (GRAPHICS_VER(engine->i915) >= 3) > + engine_sample_gen3(engine, period_ns); > + else > + engine_sample_gen2(engine, period_ns); > +} > + > static void > engines_sample(struct intel_gt *gt, unsigned int period_ns) > { > @@ -1243,11 +1268,6 @@ void i915_pmu_register(struct drm_i915_private *i915) > > int ret = -ENOMEM; > > - if (GRAPHICS_VER(i915) <= 2) { > - drm_info(&i915->drm, "PMU not supported for this GPU."); > - return; > - } > - > spin_lock_init(&pmu->lock); > hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); > pmu->timer.function = i915_sample;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index a8eac59e3779..1c4784cb296c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -15,6 +15,7 @@ #define HEAD_WRAP_COUNT 0xFFE00000 #define HEAD_WRAP_ONE 0x00200000 #define HEAD_ADDR 0x001FFFFC +#define HEAD_WAIT_I8XX (1 << 0) /* gen2, PRBx_HEAD */ #define RING_START(base) _MMIO((base) + 0x38) #define RING_CTL(base) _MMIO((base) + 0x3c) #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ @@ -26,7 +27,6 @@ #define RING_VALID_MASK 0x00000001 #define RING_VALID 0x00000001 #define RING_INVALID 0x00000000 -#define RING_WAIT_I8XX (1 << 0) /* gen2, PRBx_HEAD */ #define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */ #define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */ #define RING_SYNC_0(base) _MMIO((base) + 0x40) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 67b6cbdeff1d..2eed4c866321 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -356,7 +356,7 @@ static bool exclusive_mmio_access(const struct drm_i915_private *i915) return GRAPHICS_VER(i915) == 7; } -static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) +static void engine_sample_gen3(struct intel_engine_cs *engine, unsigned int period_ns) { struct intel_engine_pmu *pmu = &engine->pmu; bool busy; @@ -391,6 +391,31 @@ static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); } +static void engine_sample_gen2(struct intel_engine_cs *engine, unsigned int period_ns) +{ + struct intel_engine_pmu *pmu = &engine->pmu; + u32 tail, head, acthd; + + tail = ENGINE_READ_FW(engine, RING_TAIL); + head = ENGINE_READ_FW(engine, RING_HEAD); + acthd = ENGINE_READ_FW(engine, ACTHD); + + if (head & HEAD_WAIT_I8XX) + add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); + + if (head & HEAD_WAIT_I8XX || head != acthd || + (head & HEAD_ADDR) != (tail & TAIL_ADDR)) + add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); +} + +static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) +{ + if (GRAPHICS_VER(engine->i915) >= 3) + engine_sample_gen3(engine, period_ns); + else + engine_sample_gen2(engine, period_ns); +} + static void engines_sample(struct intel_gt *gt, unsigned int period_ns) { @@ -1243,11 +1268,6 @@ void i915_pmu_register(struct drm_i915_private *i915) int ret = -ENOMEM; - if (GRAPHICS_VER(i915) <= 2) { - drm_info(&i915->drm, "PMU not supported for this GPU."); - return; - } - spin_lock_init(&pmu->lock); hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); pmu->timer.function = i915_sample;