Message ID | 20190412153723.31931-3-mika.kuoppala@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/3] drm/i915: Shortcut readiness to reset check | expand |
Quoting Mika Kuoppala (2019-04-12 16:37:23) > Add a log entry to indicate if engine reported an intr > condition that is a sign of halted command streamer. It's a user error. Isn't that we spam the "there has been an error; resetting the gpu" message enough? You could just look at the iir in the error capture for the same effect. > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> > --- > drivers/gpu/drm/i915/i915_irq.c | 24 +++++++++++++++++++++--- > drivers/gpu/drm/i915/i915_reg.h | 4 ++++ > 2 files changed, 25 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index d934545445e1..c64ef1291d62 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1480,7 +1480,7 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, > } > > static void > -gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) > +gen8_cs_irq_handler(struct intel_engine_cs *engine, const u32 iir) > { > bool tasklet = false; > > @@ -1496,6 +1496,20 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) > tasklet_hi_schedule(&engine->execlists.tasklet); > } > > +static void > +gen11_cs_irq_handler(struct intel_engine_cs *engine, const u32 iir) > +{ > + const u32 error_intrs = > + GT_LEGACY_CONTEXT_PPGTT_PAGE_FAULT | > + GT_CATASTROPHIC_ERROR_INTERRUPT | > + GT_CS_MASTER_ERROR_INTERRUPT; > + > + if (likely(!(error_intrs & iir))) Likely? > + return gen8_cs_irq_handler(engine, iir); > + > + DRM_ERROR("%s: intr 0x%08x\n", engine->name, iir); > +} > + > static void gen8_gt_irq_ack(struct drm_i915_private *i915, > u32 master_ctl, u32 gt_iir[4]) > { > @@ -3002,7 +3016,7 @@ gen11_engine_irq_handler(struct drm_i915_private * const i915, > engine = NULL; > > if (likely(engine)) > - return gen8_cs_irq_handler(engine, iir); > + return gen11_cs_irq_handler(engine, iir); > > WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n", > class, instance); > @@ -4120,7 +4134,11 @@ static int gen8_irq_postinstall(struct drm_device *dev) > > static void gen11_gt_irq_postinstall(struct drm_i915_private *dev_priv) > { > - const u32 irqs = GT_RENDER_USER_INTERRUPT | GT_CONTEXT_SWITCH_INTERRUPT; > + const u32 irqs = GT_CATASTROPHIC_ERROR_INTERRUPT | > + GT_LEGACY_CONTEXT_PPGTT_PAGE_FAULT | > + GT_CS_MASTER_ERROR_INTERRUPT | > + GT_RENDER_USER_INTERRUPT | > + GT_CONTEXT_SWITCH_INTERRUPT; > > BUILD_BUG_ON(irqs & 0xffff0000); > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index c1c0f7ab03e9..fb1cff071f7a 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -2996,6 +2996,10 @@ enum i915_power_well_id { > #define GT_RENDER_DEBUG_INTERRUPT (1 << 1) > #define GT_RENDER_USER_INTERRUPT (1 << 0) > > +#define GT_CATASTROPHIC_ERROR_INTERRUPT (1 << 15) /* gen11 */ > +#define GT_LEGACY_CONTEXT_PPGTT_PAGE_FAULT (1 << 7) /* gen11 */ > +#define GT_CS_MASTER_ERROR_INTERRUPT (1 << 3) /* gen11 */ > + > #define PM_VEBOX_CS_ERROR_INTERRUPT (1 << 12) /* hsw+ */ > #define PM_VEBOX_USER_INTERRUPT (1 << 10) /* hsw+ */ > > -- > 2.17.1 >
Chris Wilson <chris@chris-wilson.co.uk> writes: > Quoting Mika Kuoppala (2019-04-12 16:37:23) >> Add a log entry to indicate if engine reported an intr >> condition that is a sign of halted command streamer. > > It's a user error. Isn't that we spam the "there has been an error; > resetting the gpu" message enough? > > You could just look at the iir in the error capture for the same effect. > Yeah it is debatable if this adds any value at all in this form. Could drop this and introduce only if I get motivation to handle reset faster straight from encountering error intr. >> Cc: Chris Wilson <chris@chris-wilson.co.uk> >> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> >> --- >> drivers/gpu/drm/i915/i915_irq.c | 24 +++++++++++++++++++++--- >> drivers/gpu/drm/i915/i915_reg.h | 4 ++++ >> 2 files changed, 25 insertions(+), 3 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c >> index d934545445e1..c64ef1291d62 100644 >> --- a/drivers/gpu/drm/i915/i915_irq.c >> +++ b/drivers/gpu/drm/i915/i915_irq.c >> @@ -1480,7 +1480,7 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, >> } >> >> static void >> -gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) >> +gen8_cs_irq_handler(struct intel_engine_cs *engine, const u32 iir) >> { >> bool tasklet = false; >> >> @@ -1496,6 +1496,20 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) >> tasklet_hi_schedule(&engine->execlists.tasklet); >> } >> >> +static void >> +gen11_cs_irq_handler(struct intel_engine_cs *engine, const u32 iir) >> +{ >> + const u32 error_intrs = >> + GT_LEGACY_CONTEXT_PPGTT_PAGE_FAULT | >> + GT_CATASTROPHIC_ERROR_INTERRUPT | >> + GT_CS_MASTER_ERROR_INTERRUPT; >> + >> + if (likely(!(error_intrs & iir))) > > Likely? > Likely to not have errors. -Mika >> + return gen8_cs_irq_handler(engine, iir); >> + >> + DRM_ERROR("%s: intr 0x%08x\n", engine->name, iir); >> +} >> + >> static void gen8_gt_irq_ack(struct drm_i915_private *i915, >> u32 master_ctl, u32 gt_iir[4]) >> { >> @@ -3002,7 +3016,7 @@ gen11_engine_irq_handler(struct drm_i915_private * const i915, >> engine = NULL; >> >> if (likely(engine)) >> - return gen8_cs_irq_handler(engine, iir); >> + return gen11_cs_irq_handler(engine, iir); >> >> WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n", >> class, instance); >> @@ -4120,7 +4134,11 @@ static int gen8_irq_postinstall(struct drm_device *dev) >> >> static void gen11_gt_irq_postinstall(struct drm_i915_private *dev_priv) >> { >> - const u32 irqs = GT_RENDER_USER_INTERRUPT | GT_CONTEXT_SWITCH_INTERRUPT; >> + const u32 irqs = GT_CATASTROPHIC_ERROR_INTERRUPT | >> + GT_LEGACY_CONTEXT_PPGTT_PAGE_FAULT | >> + GT_CS_MASTER_ERROR_INTERRUPT | >> + GT_RENDER_USER_INTERRUPT | >> + GT_CONTEXT_SWITCH_INTERRUPT; >> >> BUILD_BUG_ON(irqs & 0xffff0000); >> >> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h >> index c1c0f7ab03e9..fb1cff071f7a 100644 >> --- a/drivers/gpu/drm/i915/i915_reg.h >> +++ b/drivers/gpu/drm/i915/i915_reg.h >> @@ -2996,6 +2996,10 @@ enum i915_power_well_id { >> #define GT_RENDER_DEBUG_INTERRUPT (1 << 1) >> #define GT_RENDER_USER_INTERRUPT (1 << 0) >> >> +#define GT_CATASTROPHIC_ERROR_INTERRUPT (1 << 15) /* gen11 */ >> +#define GT_LEGACY_CONTEXT_PPGTT_PAGE_FAULT (1 << 7) /* gen11 */ >> +#define GT_CS_MASTER_ERROR_INTERRUPT (1 << 3) /* gen11 */ >> + >> #define PM_VEBOX_CS_ERROR_INTERRUPT (1 << 12) /* hsw+ */ >> #define PM_VEBOX_USER_INTERRUPT (1 << 10) /* hsw+ */ >> >> -- >> 2.17.1 >>
Quoting Mika Kuoppala (2019-04-12 16:47:11) > Chris Wilson <chris@chris-wilson.co.uk> writes: > > > Quoting Mika Kuoppala (2019-04-12 16:37:23) > >> Add a log entry to indicate if engine reported an intr > >> condition that is a sign of halted command streamer. > > > > It's a user error. Isn't that we spam the "there has been an error; > > resetting the gpu" message enough? > > > > You could just look at the iir in the error capture for the same effect. > > > > Yeah it is debatable if this adds any value at all in this form. > Could drop this and introduce only if I get motivation > to handle reset faster straight from encountering error > intr. > > >> Cc: Chris Wilson <chris@chris-wilson.co.uk> > >> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> > >> --- > >> drivers/gpu/drm/i915/i915_irq.c | 24 +++++++++++++++++++++--- > >> drivers/gpu/drm/i915/i915_reg.h | 4 ++++ > >> 2 files changed, 25 insertions(+), 3 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > >> index d934545445e1..c64ef1291d62 100644 > >> --- a/drivers/gpu/drm/i915/i915_irq.c > >> +++ b/drivers/gpu/drm/i915/i915_irq.c > >> @@ -1480,7 +1480,7 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, > >> } > >> > >> static void > >> -gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) > >> +gen8_cs_irq_handler(struct intel_engine_cs *engine, const u32 iir) > >> { > >> bool tasklet = false; > >> > >> @@ -1496,6 +1496,20 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) > >> tasklet_hi_schedule(&engine->execlists.tasklet); > >> } > >> > >> +static void > >> +gen11_cs_irq_handler(struct intel_engine_cs *engine, const u32 iir) > >> +{ > >> + const u32 error_intrs = > >> + GT_LEGACY_CONTEXT_PPGTT_PAGE_FAULT | > >> + GT_CATASTROPHIC_ERROR_INTERRUPT | > >> + GT_CS_MASTER_ERROR_INTERRUPT; > >> + > >> + if (likely(!(error_intrs & iir))) > > > > Likely? > > > > Likely to not have errors. Oh, this is the CS/USER_INTERRUPT handler. Yeah, much more likely. So much more, I'd push more for this to be polled by the error handler :) -Chris
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d934545445e1..c64ef1291d62 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1480,7 +1480,7 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, } static void -gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) +gen8_cs_irq_handler(struct intel_engine_cs *engine, const u32 iir) { bool tasklet = false; @@ -1496,6 +1496,20 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) tasklet_hi_schedule(&engine->execlists.tasklet); } +static void +gen11_cs_irq_handler(struct intel_engine_cs *engine, const u32 iir) +{ + const u32 error_intrs = + GT_LEGACY_CONTEXT_PPGTT_PAGE_FAULT | + GT_CATASTROPHIC_ERROR_INTERRUPT | + GT_CS_MASTER_ERROR_INTERRUPT; + + if (likely(!(error_intrs & iir))) + return gen8_cs_irq_handler(engine, iir); + + DRM_ERROR("%s: intr 0x%08x\n", engine->name, iir); +} + static void gen8_gt_irq_ack(struct drm_i915_private *i915, u32 master_ctl, u32 gt_iir[4]) { @@ -3002,7 +3016,7 @@ gen11_engine_irq_handler(struct drm_i915_private * const i915, engine = NULL; if (likely(engine)) - return gen8_cs_irq_handler(engine, iir); + return gen11_cs_irq_handler(engine, iir); WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n", class, instance); @@ -4120,7 +4134,11 @@ static int gen8_irq_postinstall(struct drm_device *dev) static void gen11_gt_irq_postinstall(struct drm_i915_private *dev_priv) { - const u32 irqs = GT_RENDER_USER_INTERRUPT | GT_CONTEXT_SWITCH_INTERRUPT; + const u32 irqs = GT_CATASTROPHIC_ERROR_INTERRUPT | + GT_LEGACY_CONTEXT_PPGTT_PAGE_FAULT | + GT_CS_MASTER_ERROR_INTERRUPT | + GT_RENDER_USER_INTERRUPT | + GT_CONTEXT_SWITCH_INTERRUPT; BUILD_BUG_ON(irqs & 0xffff0000); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c1c0f7ab03e9..fb1cff071f7a 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2996,6 +2996,10 @@ enum i915_power_well_id { #define GT_RENDER_DEBUG_INTERRUPT (1 << 1) #define GT_RENDER_USER_INTERRUPT (1 << 0) +#define GT_CATASTROPHIC_ERROR_INTERRUPT (1 << 15) /* gen11 */ +#define GT_LEGACY_CONTEXT_PPGTT_PAGE_FAULT (1 << 7) /* gen11 */ +#define GT_CS_MASTER_ERROR_INTERRUPT (1 << 3) /* gen11 */ + #define PM_VEBOX_CS_ERROR_INTERRUPT (1 << 12) /* hsw+ */ #define PM_VEBOX_USER_INTERRUPT (1 << 10) /* hsw+ */
Add a log entry to indicate if engine reported an intr condition that is a sign of halted command streamer. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> --- drivers/gpu/drm/i915/i915_irq.c | 24 +++++++++++++++++++++--- drivers/gpu/drm/i915/i915_reg.h | 4 ++++ 2 files changed, 25 insertions(+), 3 deletions(-)