Message ID | 20220415224025.3693037-3-umesh.nerlige.ramappa@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Enable WAs related to DG2 | expand |
On 4/15/2022 3:40 PM, Umesh Nerlige Ramappa wrote: > Initiating a reset when the command streamer is not idle or in the > middle of executing an MI_FORCE_WAKE can result in a hang. Multiple > command streamers can be part of a single reset domain, so resetting one > would mean resetting all command streamers in that domain. > > To workaround this, before initiating a reset, ensure that all command > streamers within that reset domain are either IDLE or are not executing > a MI_FORCE_WAKE. > > Enable GuC PRE_PARSER WA bit so that GuC follows the WA sequence when > initiating engine-resets. > > For gt-resets, ensure that i915 applies the WA sequence. > > Opens to address in future patches: > - The part of the WA to wait for pending forcewakes is also applicable > to execlists backend. > - The WA also needs to be applied for gen11 I would've preferred this patch to already include the gen11 checks, but given that gen11 defaults to execlists submission and that we'd need to specially enable GuC in CI to cover, I can see how making it a follow-up makes things simpler, so not a blocker. > > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Daniele > --- > drivers/gpu/drm/i915/gt/intel_gt_pm.c | 9 +- > drivers/gpu/drm/i915/gt/intel_gt_regs.h | 18 ++++ > drivers/gpu/drm/i915/gt/intel_reset.c | 5 +- > drivers/gpu/drm/i915/gt/uc/intel_guc.c | 4 + > drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 3 +- > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 85 ++++++++++++++++++- > 6 files changed, 116 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c > index eeead40485fb..f553e2173bda 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c > +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c > @@ -182,15 +182,16 @@ static void gt_sanitize(struct intel_gt *gt, bool force) > if (intel_gt_is_wedged(gt)) > intel_gt_unset_wedged(gt); > > - for_each_engine(engine, gt, id) > + /* For GuC mode, ensure submission is disabled before stopping ring */ > + intel_uc_reset_prepare(>->uc); > + > + for_each_engine(engine, gt, id) { > if (engine->reset.prepare) > engine->reset.prepare(engine); > > - intel_uc_reset_prepare(>->uc); > - > - for_each_engine(engine, gt, id) > if (engine->sanitize) > engine->sanitize(engine); > + } > > if (reset_engines(gt) || force) { > for_each_engine(engine, gt, id) > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h > index 0a5c2648aaf0..12d892851684 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h > +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h > @@ -841,6 +841,24 @@ > #define CTC_SHIFT_PARAMETER_SHIFT 1 > #define CTC_SHIFT_PARAMETER_MASK (0x3 << CTC_SHIFT_PARAMETER_SHIFT) > > +/* GPM MSG_IDLE */ > +#define MSG_IDLE_CS _MMIO(0x8000) > +#define MSG_IDLE_VCS0 _MMIO(0x8004) > +#define MSG_IDLE_VCS1 _MMIO(0x8008) > +#define MSG_IDLE_BCS _MMIO(0x800C) > +#define MSG_IDLE_VECS0 _MMIO(0x8010) > +#define MSG_IDLE_VCS2 _MMIO(0x80C0) > +#define MSG_IDLE_VCS3 _MMIO(0x80C4) > +#define MSG_IDLE_VCS4 _MMIO(0x80C8) > +#define MSG_IDLE_VCS5 _MMIO(0x80CC) > +#define MSG_IDLE_VCS6 _MMIO(0x80D0) > +#define MSG_IDLE_VCS7 _MMIO(0x80D4) > +#define MSG_IDLE_VECS1 _MMIO(0x80D8) > +#define MSG_IDLE_VECS2 _MMIO(0x80DC) > +#define MSG_IDLE_VECS3 _MMIO(0x80E0) > +#define MSG_IDLE_FW_MASK REG_GENMASK(13, 9) > +#define MSG_IDLE_FW_SHIFT 9 > + > #define FORCEWAKE_MEDIA_GEN9 _MMIO(0xa270) > #define FORCEWAKE_RENDER_GEN9 _MMIO(0xa278) > > diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c > index f52015e79fdf..5422a3b84bd4 100644 > --- a/drivers/gpu/drm/i915/gt/intel_reset.c > +++ b/drivers/gpu/drm/i915/gt/intel_reset.c > @@ -772,14 +772,15 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt) > intel_engine_mask_t awake = 0; > enum intel_engine_id id; > > + /* For GuC mode, ensure submission is disabled before stopping ring */ > + intel_uc_reset_prepare(>->uc); > + > for_each_engine(engine, gt, id) { > if (intel_engine_pm_get_if_awake(engine)) > awake |= engine->mask; > reset_prepare_engine(engine); > } > > - intel_uc_reset_prepare(>->uc); > - > return awake; > } > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c > index cda7e4bb8bac..185fb4d59791 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c > @@ -292,6 +292,10 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) > GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50)) > flags |= GUC_WA_POLLCS; > > + /* Wa_22011802037: graphics version 12 */ > + if (GRAPHICS_VER(gt->i915) == 12) > + flags |= GUC_WA_PRE_PARSER; > + > return flags; > } > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h > index c154b5efccde..b136d6528fbf 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h > @@ -98,7 +98,8 @@ > #define GUC_LOG_BUF_ADDR_SHIFT 12 > > #define GUC_CTL_WA 1 > -#define GUC_WA_POLLCS BIT(18) > +#define GUC_WA_PRE_PARSER BIT(14) > +#define GUC_WA_POLLCS BIT(18) > > #define GUC_CTL_FEATURE 2 > #define GUC_CTL_ENABLE_SLPC BIT(2) > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index ee45fdb67f32..172819cd1a0a 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -1540,6 +1540,89 @@ static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) > lrc_update_regs(ce, engine, head); > } > > +static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine) > +{ > + static const i915_reg_t _reg[I915_NUM_ENGINES] = { > + [RCS0] = MSG_IDLE_CS, > + [BCS0] = MSG_IDLE_BCS, > + [VCS0] = MSG_IDLE_VCS0, > + [VCS1] = MSG_IDLE_VCS1, > + [VCS2] = MSG_IDLE_VCS2, > + [VCS3] = MSG_IDLE_VCS3, > + [VCS4] = MSG_IDLE_VCS4, > + [VCS5] = MSG_IDLE_VCS5, > + [VCS6] = MSG_IDLE_VCS6, > + [VCS7] = MSG_IDLE_VCS7, > + [VECS0] = MSG_IDLE_VECS0, > + [VECS1] = MSG_IDLE_VECS1, > + [VECS2] = MSG_IDLE_VECS2, > + [VECS3] = MSG_IDLE_VECS3, > + [CCS0] = MSG_IDLE_CS, > + [CCS1] = MSG_IDLE_CS, > + [CCS2] = MSG_IDLE_CS, > + [CCS3] = MSG_IDLE_CS, > + }; > + u32 val; > + > + if (!_reg[engine->id].reg) > + return 0; > + > + val = intel_uncore_read(engine->uncore, _reg[engine->id]); > + > + /* bits[29:25] & bits[13:9] >> shift */ > + return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT; > +} > + > +static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask) > +{ > + int ret; > + > + /* Ensure GPM receives fw up/down after CS is stopped */ > + udelay(1); > + > + /* Wait for forcewake request to complete in GPM */ > + ret = __intel_wait_for_register_fw(gt->uncore, > + GEN9_PWRGT_DOMAIN_STATUS, > + fw_mask, fw_mask, 5000, 0, NULL); > + > + /* Ensure CS receives fw ack from GPM */ > + udelay(1); > + > + if (ret) > + GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret); > +} > + > +/* > + * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any > + * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The > + * pending status is indicated by bits[13:9] (masked by bits[ 29:25]) in the > + * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we > + * are concerned only with the gt reset here, we use a logical OR of pending > + * forcewakeups from all reset domains and then wait for them to complete by > + * querying PWRGT_DOMAIN_STATUS. > + */ > +static void guc_engine_reset_prepare(struct intel_engine_cs *engine) > +{ > + u32 fw_pending; > + > + if (GRAPHICS_VER(engine->i915) != 12) > + return; > + > + /* > + * Wa_22011802037 > + * TODO: Occassionally trying to stop the cs times out, but does not > + * adversely affect functionality. The timeout is set as a config > + * parameter that defaults to 100ms. Assuming that this timeout is > + * sufficient for any pending MI_FORCEWAKEs to complete, ignore the > + * timeout returned here until it is root caused. > + */ > + intel_engine_stop_cs(engine); > + > + fw_pending = __cs_pending_mi_force_wakes(engine); > + if (fw_pending) > + __gpm_wait_for_fw_complete(engine->gt, fw_pending); > +} > + > static void guc_reset_nop(struct intel_engine_cs *engine) > { > } > @@ -3795,7 +3878,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) > > engine->sched_engine->schedule = i915_schedule; > > - engine->reset.prepare = guc_reset_nop; > + engine->reset.prepare = guc_engine_reset_prepare; > engine->reset.rewind = guc_rewind_nop; > engine->reset.cancel = guc_reset_nop; > engine->reset.finish = guc_reset_nop;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index eeead40485fb..f553e2173bda 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -182,15 +182,16 @@ static void gt_sanitize(struct intel_gt *gt, bool force) if (intel_gt_is_wedged(gt)) intel_gt_unset_wedged(gt); - for_each_engine(engine, gt, id) + /* For GuC mode, ensure submission is disabled before stopping ring */ + intel_uc_reset_prepare(>->uc); + + for_each_engine(engine, gt, id) { if (engine->reset.prepare) engine->reset.prepare(engine); - intel_uc_reset_prepare(>->uc); - - for_each_engine(engine, gt, id) if (engine->sanitize) engine->sanitize(engine); + } if (reset_engines(gt) || force) { for_each_engine(engine, gt, id) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 0a5c2648aaf0..12d892851684 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -841,6 +841,24 @@ #define CTC_SHIFT_PARAMETER_SHIFT 1 #define CTC_SHIFT_PARAMETER_MASK (0x3 << CTC_SHIFT_PARAMETER_SHIFT) +/* GPM MSG_IDLE */ +#define MSG_IDLE_CS _MMIO(0x8000) +#define MSG_IDLE_VCS0 _MMIO(0x8004) +#define MSG_IDLE_VCS1 _MMIO(0x8008) +#define MSG_IDLE_BCS _MMIO(0x800C) +#define MSG_IDLE_VECS0 _MMIO(0x8010) +#define MSG_IDLE_VCS2 _MMIO(0x80C0) +#define MSG_IDLE_VCS3 _MMIO(0x80C4) +#define MSG_IDLE_VCS4 _MMIO(0x80C8) +#define MSG_IDLE_VCS5 _MMIO(0x80CC) +#define MSG_IDLE_VCS6 _MMIO(0x80D0) +#define MSG_IDLE_VCS7 _MMIO(0x80D4) +#define MSG_IDLE_VECS1 _MMIO(0x80D8) +#define MSG_IDLE_VECS2 _MMIO(0x80DC) +#define MSG_IDLE_VECS3 _MMIO(0x80E0) +#define MSG_IDLE_FW_MASK REG_GENMASK(13, 9) +#define MSG_IDLE_FW_SHIFT 9 + #define FORCEWAKE_MEDIA_GEN9 _MMIO(0xa270) #define FORCEWAKE_RENDER_GEN9 _MMIO(0xa278) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index f52015e79fdf..5422a3b84bd4 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -772,14 +772,15 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt) intel_engine_mask_t awake = 0; enum intel_engine_id id; + /* For GuC mode, ensure submission is disabled before stopping ring */ + intel_uc_reset_prepare(>->uc); + for_each_engine(engine, gt, id) { if (intel_engine_pm_get_if_awake(engine)) awake |= engine->mask; reset_prepare_engine(engine); } - intel_uc_reset_prepare(>->uc); - return awake; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index cda7e4bb8bac..185fb4d59791 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -292,6 +292,10 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50)) flags |= GUC_WA_POLLCS; + /* Wa_22011802037: graphics version 12 */ + if (GRAPHICS_VER(gt->i915) == 12) + flags |= GUC_WA_PRE_PARSER; + return flags; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index c154b5efccde..b136d6528fbf 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -98,7 +98,8 @@ #define GUC_LOG_BUF_ADDR_SHIFT 12 #define GUC_CTL_WA 1 -#define GUC_WA_POLLCS BIT(18) +#define GUC_WA_PRE_PARSER BIT(14) +#define GUC_WA_POLLCS BIT(18) #define GUC_CTL_FEATURE 2 #define GUC_CTL_ENABLE_SLPC BIT(2) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index ee45fdb67f32..172819cd1a0a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1540,6 +1540,89 @@ static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) lrc_update_regs(ce, engine, head); } +static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine) +{ + static const i915_reg_t _reg[I915_NUM_ENGINES] = { + [RCS0] = MSG_IDLE_CS, + [BCS0] = MSG_IDLE_BCS, + [VCS0] = MSG_IDLE_VCS0, + [VCS1] = MSG_IDLE_VCS1, + [VCS2] = MSG_IDLE_VCS2, + [VCS3] = MSG_IDLE_VCS3, + [VCS4] = MSG_IDLE_VCS4, + [VCS5] = MSG_IDLE_VCS5, + [VCS6] = MSG_IDLE_VCS6, + [VCS7] = MSG_IDLE_VCS7, + [VECS0] = MSG_IDLE_VECS0, + [VECS1] = MSG_IDLE_VECS1, + [VECS2] = MSG_IDLE_VECS2, + [VECS3] = MSG_IDLE_VECS3, + [CCS0] = MSG_IDLE_CS, + [CCS1] = MSG_IDLE_CS, + [CCS2] = MSG_IDLE_CS, + [CCS3] = MSG_IDLE_CS, + }; + u32 val; + + if (!_reg[engine->id].reg) + return 0; + + val = intel_uncore_read(engine->uncore, _reg[engine->id]); + + /* bits[29:25] & bits[13:9] >> shift */ + return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT; +} + +static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask) +{ + int ret; + + /* Ensure GPM receives fw up/down after CS is stopped */ + udelay(1); + + /* Wait for forcewake request to complete in GPM */ + ret = __intel_wait_for_register_fw(gt->uncore, + GEN9_PWRGT_DOMAIN_STATUS, + fw_mask, fw_mask, 5000, 0, NULL); + + /* Ensure CS receives fw ack from GPM */ + udelay(1); + + if (ret) + GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret); +} + +/* + * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any + * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The + * pending status is indicated by bits[13:9] (masked by bits[ 29:25]) in the + * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we + * are concerned only with the gt reset here, we use a logical OR of pending + * forcewakeups from all reset domains and then wait for them to complete by + * querying PWRGT_DOMAIN_STATUS. + */ +static void guc_engine_reset_prepare(struct intel_engine_cs *engine) +{ + u32 fw_pending; + + if (GRAPHICS_VER(engine->i915) != 12) + return; + + /* + * Wa_22011802037 + * TODO: Occassionally trying to stop the cs times out, but does not + * adversely affect functionality. The timeout is set as a config + * parameter that defaults to 100ms. Assuming that this timeout is + * sufficient for any pending MI_FORCEWAKEs to complete, ignore the + * timeout returned here until it is root caused. + */ + intel_engine_stop_cs(engine); + + fw_pending = __cs_pending_mi_force_wakes(engine); + if (fw_pending) + __gpm_wait_for_fw_complete(engine->gt, fw_pending); +} + static void guc_reset_nop(struct intel_engine_cs *engine) { } @@ -3795,7 +3878,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->sched_engine->schedule = i915_schedule; - engine->reset.prepare = guc_reset_nop; + engine->reset.prepare = guc_engine_reset_prepare; engine->reset.rewind = guc_rewind_nop; engine->reset.cancel = guc_reset_nop; engine->reset.finish = guc_reset_nop;
Initiating a reset when the command streamer is not idle or in the middle of executing an MI_FORCE_WAKE can result in a hang. Multiple command streamers can be part of a single reset domain, so resetting one would mean resetting all command streamers in that domain. To workaround this, before initiating a reset, ensure that all command streamers within that reset domain are either IDLE or are not executing a MI_FORCE_WAKE. Enable GuC PRE_PARSER WA bit so that GuC follows the WA sequence when initiating engine-resets. For gt-resets, ensure that i915 applies the WA sequence. Opens to address in future patches: - The part of the WA to wait for pending forcewakes is also applicable to execlists backend. - The WA also needs to be applied for gen11 Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 9 +- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 18 ++++ drivers/gpu/drm/i915/gt/intel_reset.c | 5 +- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 4 + drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 3 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 85 ++++++++++++++++++- 6 files changed, 116 insertions(+), 8 deletions(-)