Message ID | 20171026140144.12267-1-mika.kuoppala@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Quoting Mika Kuoppala (2017-10-26 15:01:44) > There is a possibility on gen9 hardware to miss the forcewake ack > message. The recommended workaround is to use another free > bit and toggle it until original bit is successfully acknowledged. > > The workaround is a bit misleadingly named as WaRsForcewakeAddDelayForAck. > The reason for naming is most likely that workaround was named before > the most recent recommendation evolved to use the reserve bit. > > Some future gen9 revs might or might not fix the underlying issue but > the fallback to reserve bit dance can be considered as harmless: > without the ack timeout we never reach the reserve bit forcewake. > Thus as of now we adopt a blanket approach for all gen9 and leave > the bypassing the reserve bit approach for future patches if > corresponding hw revisions do appear. > > Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms > for forcewake request/clear ack") did increase the forcewake timeout. > If the issue was a delayed ack, future work could include finding > a suitable timeout value both for primary ack and reserve toggle > to reduce the worst case latency. > > References: HSDES #1604254524 > References: https://bugs.freedesktop.org/show_bug.cgi?id=102051 > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com> > Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> > --- > drivers/gpu/drm/i915/i915_reg.h | 5 +- > drivers/gpu/drm/i915/intel_uncore.c | 112 +++++++++++++++++++++++++++++++++--- > 2 files changed, 108 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index f138eae82bf0..c04c634af5ae 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -7774,8 +7774,9 @@ enum { > #define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0x0D88) > #define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0x0D84) > #define FORCEWAKE_ACK_BLITTER_GEN9 _MMIO(0x130044) > -#define FORCEWAKE_KERNEL 0x1 > -#define FORCEWAKE_USER 0x2 > +#define FORCEWAKE_KERNEL BIT(0) > +#define FORCEWAKE_USER BIT(1) > +#define FORCEWAKE_RESERVE BIT(12) Why 12? How about 15? FORCEWAKE_KERNEL2 or FORCEWAKE_KERNEL_FALLBACK Reserved tends to imply future hw bits. So I think s/reserve/fallback/ works throughout the patch. > #define FORCEWAKE_MT_ACK _MMIO(0x130040) > #define ECOBUS _MMIO(0xa180) > #define FORCEWAKE_MT_ENABLE (1<<5) > diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c > index 20e3c65c0999..fc6d090244c5 100644 > --- a/drivers/gpu/drm/i915/intel_uncore.c > +++ b/drivers/gpu/drm/i915/intel_uncore.c > @@ -69,17 +69,80 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d) > HRTIMER_MODE_REL); > } > > +static inline bool > +wait_ack_clear(const struct drm_i915_private *i915, > + const struct intel_uncore_forcewake_domain *d, > + const u32 ack) > +{ > + return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == 0, > + FORCEWAKE_ACK_TIMEOUT_MS); > +} > + > +static inline bool > +wait_ack_set(const struct drm_i915_private *i915, > + const struct intel_uncore_forcewake_domain *d, > + const u32 ack) > +{ > + return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack), > + FORCEWAKE_ACK_TIMEOUT_MS); > +} Let's make these one function, to cut down on the wait_for() expansions. Keeping the wait_ack_clear/wait_ack_set wrappers. static inline int __wait_for_ack(i915, d, ack, value) { return wait_for_atomic(((__raw_i915_read32(i915, d->reg_ack) & ack)) == value, FORCEWAKE_ACK_TIMEOUT_MS); } static inline int wait_ack_clear() { return __wait_for_ack(i915, d, ack, 0)); static inline int wait_ack_set() { return __wait_for_ack(i915, d, ack, ack)); > static inline void > fw_domain_wait_ack_clear(const struct drm_i915_private *i915, > const struct intel_uncore_forcewake_domain *d) > { > - if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & > - FORCEWAKE_KERNEL) == 0, > - FORCEWAKE_ACK_TIMEOUT_MS)) > + if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL)) > DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n", > intel_uncore_forcewake_domain_to_str(d->id)); > } > > +enum ack_type { > + ACK_CLEAR = 0, > + ACK_SET > +}; > + > +static bool > +fw_domain_reserve_fallback(const struct drm_i915_private *i915, > + const struct intel_uncore_forcewake_domain *d, > + const enum ack_type type) > +{ > + bool timeout; > + int retry = 10; > + > + /* Fallback to toggle another fw bit to wake up the gpu */ The comment needs some work; the first bit itself was meant to wake the gpu, so why is this bit any more likely to work? > + do { > + wait_ack_clear(i915, d, FORCEWAKE_RESERVE); > + __raw_i915_write32(i915, d->reg_set, > + _MASKED_BIT_ENABLE(FORCEWAKE_RESERVE)); > + wait_ack_set(i915, d, FORCEWAKE_RESERVE); > + > + if (type == ACK_CLEAR) > + timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL); > + else > + timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL); > + > + __raw_i915_write32(i915, d->reg_set, > + _MASKED_BIT_DISABLE(FORCEWAKE_RESERVE)); Blindly toggling another bit to encourage the first to succeed. Why stop at 1 extra bit???? If the wait_ack_set(RSVD) fails, you might as well abort? Other than my face hitting the desk, the logic makes sense to me: Assert second fw bit; check original bit; clear second fw bit (so we don't keep fw longer than required). This basically also has the side-effect of making the timeout 12x longer. > + } while (timeout && --retry); > + > + return timeout; > +} > + > +static inline void > +fw_domain_wait_ack_clear_reserve(const struct drm_i915_private *i915, > + const struct intel_uncore_forcewake_domain *d) > +{ > + bool timeout; bool err; or none at all. timeout tends to be a value we pass around telling us how long until the timeout; not the status, which would be timedout. > + > + timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL); > + if (likely(!timeout)) > + return; > + > + timeout = fw_domain_reserve_fallback(i915, d, ACK_CLEAR); > + if (timeout) > + fw_domain_wait_ack_clear(i915, d); Ok, one last try and raise an error if it fails. > +} > + > static inline void > fw_domain_get(struct drm_i915_private *i915, > const struct intel_uncore_forcewake_domain *d) > @@ -91,14 +154,27 @@ static inline void > fw_domain_wait_ack(const struct drm_i915_private *i915, > const struct intel_uncore_forcewake_domain *d) > { > - if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & > - FORCEWAKE_KERNEL), > - FORCEWAKE_ACK_TIMEOUT_MS)) > + if (wait_ack_set(i915, d, FORCEWAKE_KERNEL)) > DRM_ERROR("%s: timed out waiting for forcewake ack request.\n", > intel_uncore_forcewake_domain_to_str(d->id)); > } > > static inline void > +fw_domain_wait_ack_set_reserve(const struct drm_i915_private *i915, > + const struct intel_uncore_forcewake_domain *d) > +{ > + bool timeout; > + > + timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL); > + if (likely(!timeout)) > + return; > + > + timeout = fw_domain_reserve_fallback(i915, d, ACK_SET); > + if (timeout) > + fw_domain_wait_ack(i915, d); > +} > + > +static inline void > fw_domain_put(const struct drm_i915_private *i915, > const struct intel_uncore_forcewake_domain *d) > { > @@ -125,6 +201,26 @@ fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains) > } > > static void > +fw_domains_get_with_reserve(struct drm_i915_private *i915, > + enum forcewake_domains fw_domains) > +{ > + struct intel_uncore_forcewake_domain *d; > + unsigned int tmp; > + > + GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains); > + > + for_each_fw_domain_masked(d, fw_domains, i915, tmp) { > + fw_domain_wait_ack_clear_reserve(i915, d); > + fw_domain_get(i915, d); > + } > + > + for_each_fw_domain_masked(d, fw_domains, i915, tmp) > + fw_domain_wait_ack_set_reserve(i915, d); > + > + i915->uncore.fw_domains_active |= fw_domains; Ok. > +} > + > +static void > fw_domains_put(struct drm_i915_private *i915, enum forcewake_domains fw_domains) > { > struct intel_uncore_forcewake_domain *d; > @@ -1142,7 +1238,9 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) > } > > if (INTEL_GEN(dev_priv) >= 9) { > - dev_priv->uncore.funcs.force_wake_get = fw_domains_get; > + /* WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl */ > + dev_priv->uncore.funcs.force_wake_get = > + fw_domains_get_with_reserve; > dev_priv->uncore.funcs.force_wake_put = fw_domains_put; > fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, > FORCEWAKE_RENDER_GEN9, > -- > 2.11.0 >
Chris Wilson <chris@chris-wilson.co.uk> writes: > Quoting Mika Kuoppala (2017-10-26 15:01:44) >> There is a possibility on gen9 hardware to miss the forcewake ack >> message. The recommended workaround is to use another free >> bit and toggle it until original bit is successfully acknowledged. >> >> The workaround is a bit misleadingly named as WaRsForcewakeAddDelayForAck. >> The reason for naming is most likely that workaround was named before >> the most recent recommendation evolved to use the reserve bit. >> >> Some future gen9 revs might or might not fix the underlying issue but >> the fallback to reserve bit dance can be considered as harmless: >> without the ack timeout we never reach the reserve bit forcewake. >> Thus as of now we adopt a blanket approach for all gen9 and leave >> the bypassing the reserve bit approach for future patches if >> corresponding hw revisions do appear. >> >> Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms >> for forcewake request/clear ack") did increase the forcewake timeout. >> If the issue was a delayed ack, future work could include finding >> a suitable timeout value both for primary ack and reserve toggle >> to reduce the worst case latency. >> >> References: HSDES #1604254524 >> References: https://bugs.freedesktop.org/show_bug.cgi?id=102051 >> Cc: Chris Wilson <chris@chris-wilson.co.uk> >> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> >> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> >> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com> >> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> >> --- >> drivers/gpu/drm/i915/i915_reg.h | 5 +- >> drivers/gpu/drm/i915/intel_uncore.c | 112 +++++++++++++++++++++++++++++++++--- >> 2 files changed, 108 insertions(+), 9 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h >> index f138eae82bf0..c04c634af5ae 100644 >> --- a/drivers/gpu/drm/i915/i915_reg.h >> +++ b/drivers/gpu/drm/i915/i915_reg.h >> @@ -7774,8 +7774,9 @@ enum { >> #define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0x0D88) >> #define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0x0D84) >> #define FORCEWAKE_ACK_BLITTER_GEN9 _MMIO(0x130044) >> -#define FORCEWAKE_KERNEL 0x1 >> -#define FORCEWAKE_USER 0x2 >> +#define FORCEWAKE_KERNEL BIT(0) >> +#define FORCEWAKE_USER BIT(1) >> +#define FORCEWAKE_RESERVE BIT(12) > > Why 12? How about 15? > I just mimiced the pseudocode from an errata sheet slavishly. AFAIK 15 should work as well as 12 so 15. > FORCEWAKE_KERNEL2 or FORCEWAKE_KERNEL_FALLBACK KERNEL2 would imply similar mechanism. Fallback I like more as this is just under the hood toggling for waking up the real bit. > > Reserved tends to imply future hw bits. So I think s/reserve/fallback/ > works throughout the patch. > Yup, fallback is better. >> #define FORCEWAKE_MT_ACK _MMIO(0x130040) >> #define ECOBUS _MMIO(0xa180) >> #define FORCEWAKE_MT_ENABLE (1<<5) >> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c >> index 20e3c65c0999..fc6d090244c5 100644 >> --- a/drivers/gpu/drm/i915/intel_uncore.c >> +++ b/drivers/gpu/drm/i915/intel_uncore.c >> @@ -69,17 +69,80 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d) >> HRTIMER_MODE_REL); >> } >> >> +static inline bool >> +wait_ack_clear(const struct drm_i915_private *i915, >> + const struct intel_uncore_forcewake_domain *d, >> + const u32 ack) >> +{ >> + return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == 0, >> + FORCEWAKE_ACK_TIMEOUT_MS); >> +} >> + >> +static inline bool >> +wait_ack_set(const struct drm_i915_private *i915, >> + const struct intel_uncore_forcewake_domain *d, >> + const u32 ack) >> +{ >> + return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack), >> + FORCEWAKE_ACK_TIMEOUT_MS); >> +} > > Let's make these one function, to cut down on the wait_for() expansions. > > Keeping the wait_ack_clear/wait_ack_set wrappers. > > static inline int > __wait_for_ack(i915, d, ack, value) > { > return wait_for_atomic(((__raw_i915_read32(i915, d->reg_ack) & ack)) == value, > FORCEWAKE_ACK_TIMEOUT_MS); > } > > static inline int wait_ack_clear() { return __wait_for_ack(i915, d, ack, 0)); > static inline int wait_ack_set() { return __wait_for_ack(i915, d, ack, ack)); > >> static inline void >> fw_domain_wait_ack_clear(const struct drm_i915_private *i915, >> const struct intel_uncore_forcewake_domain *d) >> { >> - if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & >> - FORCEWAKE_KERNEL) == 0, >> - FORCEWAKE_ACK_TIMEOUT_MS)) >> + if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL)) >> DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n", >> intel_uncore_forcewake_domain_to_str(d->id)); >> } >> >> +enum ack_type { >> + ACK_CLEAR = 0, >> + ACK_SET >> +}; >> + >> +static bool >> +fw_domain_reserve_fallback(const struct drm_i915_private *i915, >> + const struct intel_uncore_forcewake_domain *d, >> + const enum ack_type type) >> +{ >> + bool timeout; >> + int retry = 10; >> + >> + /* Fallback to toggle another fw bit to wake up the gpu */ > > The comment needs some work; the first bit itself was meant to wake the > gpu, so why is this bit any more likely to work? > Yeah, this needs rework. It is more like toggling an unrelated bit will make the 'lost or pending' ack message for the KERNEL bit to materialize. I thought that should we also retoggle the KERNEL one but again, this patch mimics the pseudocode I found 1:1. There are hints that both delaying wait and a regtogging of the original bit was used at some point of time as a workaround. This is _supposed_ to be the latest recommendation to combat the issue. Efforts and success to reproduce would give us venue to measure if it is the best known. >> + do { >> + wait_ack_clear(i915, d, FORCEWAKE_RESERVE); >> + __raw_i915_write32(i915, d->reg_set, >> + _MASKED_BIT_ENABLE(FORCEWAKE_RESERVE)); >> + wait_ack_set(i915, d, FORCEWAKE_RESERVE); >> + >> + if (type == ACK_CLEAR) >> + timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL); >> + else >> + timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL); >> + >> + __raw_i915_write32(i915, d->reg_set, >> + _MASKED_BIT_DISABLE(FORCEWAKE_RESERVE)); > > Blindly toggling another bit to encourage the first to succeed. > > Why stop at 1 extra bit???? Again 1:1 mimic of pseudocode. What we could do instead of this toggle hammering, is to go through all unreserved bits until one ack appears on any and be happy. > > If the wait_ack_set(RSVD) fails, you might as well abort? > For me it looks like this toggle hammering will kick the state machine to spit out the first ack message that was somehow lost. I did again follow the recomennded procedure precisely. > Other than my face hitting the desk, the logic makes sense to me: > > Assert second fw bit; check original bit; clear second fw bit (so we > don't keep fw longer than required). > > This basically also has the side-effect of making the timeout 12x > longer. > In the worst case yes. But the first toggle should already untangle the mess. Probability that we end up retrying all the way through is very unlikely. Now that I have said it, it will happen :) Thank you for the comments. -Mika >> + } while (timeout && --retry); >> + >> + return timeout; >> +} >> + >> +static inline void >> +fw_domain_wait_ack_clear_reserve(const struct drm_i915_private *i915, >> + const struct intel_uncore_forcewake_domain *d) >> +{ >> + bool timeout; > bool err; or none at all. > > timeout tends to be a value we pass around telling us how long until the > timeout; not the status, which would be timedout. > >> + >> + timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL); >> + if (likely(!timeout)) >> + return; >> + >> + timeout = fw_domain_reserve_fallback(i915, d, ACK_CLEAR); >> + if (timeout) >> + fw_domain_wait_ack_clear(i915, d); > > Ok, one last try and raise an error if it fails. > >> +} >> + >> static inline void >> fw_domain_get(struct drm_i915_private *i915, >> const struct intel_uncore_forcewake_domain *d) >> @@ -91,14 +154,27 @@ static inline void >> fw_domain_wait_ack(const struct drm_i915_private *i915, >> const struct intel_uncore_forcewake_domain *d) >> { >> - if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & >> - FORCEWAKE_KERNEL), >> - FORCEWAKE_ACK_TIMEOUT_MS)) >> + if (wait_ack_set(i915, d, FORCEWAKE_KERNEL)) >> DRM_ERROR("%s: timed out waiting for forcewake ack request.\n", >> intel_uncore_forcewake_domain_to_str(d->id)); >> } >> >> static inline void >> +fw_domain_wait_ack_set_reserve(const struct drm_i915_private *i915, >> + const struct intel_uncore_forcewake_domain *d) >> +{ >> + bool timeout; >> + >> + timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL); >> + if (likely(!timeout)) >> + return; >> + >> + timeout = fw_domain_reserve_fallback(i915, d, ACK_SET); >> + if (timeout) >> + fw_domain_wait_ack(i915, d); >> +} >> + >> +static inline void >> fw_domain_put(const struct drm_i915_private *i915, >> const struct intel_uncore_forcewake_domain *d) >> { >> @@ -125,6 +201,26 @@ fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains) >> } >> >> static void >> +fw_domains_get_with_reserve(struct drm_i915_private *i915, >> + enum forcewake_domains fw_domains) >> +{ >> + struct intel_uncore_forcewake_domain *d; >> + unsigned int tmp; >> + >> + GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains); >> + >> + for_each_fw_domain_masked(d, fw_domains, i915, tmp) { >> + fw_domain_wait_ack_clear_reserve(i915, d); >> + fw_domain_get(i915, d); >> + } >> + >> + for_each_fw_domain_masked(d, fw_domains, i915, tmp) >> + fw_domain_wait_ack_set_reserve(i915, d); >> + >> + i915->uncore.fw_domains_active |= fw_domains; > > Ok. > >> +} >> + >> +static void >> fw_domains_put(struct drm_i915_private *i915, enum forcewake_domains fw_domains) >> { >> struct intel_uncore_forcewake_domain *d; >> @@ -1142,7 +1238,9 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) >> } >> >> if (INTEL_GEN(dev_priv) >= 9) { >> - dev_priv->uncore.funcs.force_wake_get = fw_domains_get; >> + /* WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl */ >> + dev_priv->uncore.funcs.force_wake_get = >> + fw_domains_get_with_reserve; >> dev_priv->uncore.funcs.force_wake_put = fw_domains_put; >> fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, >> FORCEWAKE_RENDER_GEN9, >> -- >> 2.11.0 >>
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f138eae82bf0..c04c634af5ae 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7774,8 +7774,9 @@ enum { #define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0x0D88) #define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0x0D84) #define FORCEWAKE_ACK_BLITTER_GEN9 _MMIO(0x130044) -#define FORCEWAKE_KERNEL 0x1 -#define FORCEWAKE_USER 0x2 +#define FORCEWAKE_KERNEL BIT(0) +#define FORCEWAKE_USER BIT(1) +#define FORCEWAKE_RESERVE BIT(12) #define FORCEWAKE_MT_ACK _MMIO(0x130040) #define ECOBUS _MMIO(0xa180) #define FORCEWAKE_MT_ENABLE (1<<5) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 20e3c65c0999..fc6d090244c5 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -69,17 +69,80 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d) HRTIMER_MODE_REL); } +static inline bool +wait_ack_clear(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d, + const u32 ack) +{ + return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == 0, + FORCEWAKE_ACK_TIMEOUT_MS); +} + +static inline bool +wait_ack_set(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d, + const u32 ack) +{ + return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack), + FORCEWAKE_ACK_TIMEOUT_MS); +} + static inline void fw_domain_wait_ack_clear(const struct drm_i915_private *i915, const struct intel_uncore_forcewake_domain *d) { - if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & - FORCEWAKE_KERNEL) == 0, - FORCEWAKE_ACK_TIMEOUT_MS)) + if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL)) DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n", intel_uncore_forcewake_domain_to_str(d->id)); } +enum ack_type { + ACK_CLEAR = 0, + ACK_SET +}; + +static bool +fw_domain_reserve_fallback(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d, + const enum ack_type type) +{ + bool timeout; + int retry = 10; + + /* Fallback to toggle another fw bit to wake up the gpu */ + do { + wait_ack_clear(i915, d, FORCEWAKE_RESERVE); + __raw_i915_write32(i915, d->reg_set, + _MASKED_BIT_ENABLE(FORCEWAKE_RESERVE)); + wait_ack_set(i915, d, FORCEWAKE_RESERVE); + + if (type == ACK_CLEAR) + timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL); + else + timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL); + + __raw_i915_write32(i915, d->reg_set, + _MASKED_BIT_DISABLE(FORCEWAKE_RESERVE)); + } while (timeout && --retry); + + return timeout; +} + +static inline void +fw_domain_wait_ack_clear_reserve(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) +{ + bool timeout; + + timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL); + if (likely(!timeout)) + return; + + timeout = fw_domain_reserve_fallback(i915, d, ACK_CLEAR); + if (timeout) + fw_domain_wait_ack_clear(i915, d); +} + static inline void fw_domain_get(struct drm_i915_private *i915, const struct intel_uncore_forcewake_domain *d) @@ -91,14 +154,27 @@ static inline void fw_domain_wait_ack(const struct drm_i915_private *i915, const struct intel_uncore_forcewake_domain *d) { - if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & - FORCEWAKE_KERNEL), - FORCEWAKE_ACK_TIMEOUT_MS)) + if (wait_ack_set(i915, d, FORCEWAKE_KERNEL)) DRM_ERROR("%s: timed out waiting for forcewake ack request.\n", intel_uncore_forcewake_domain_to_str(d->id)); } static inline void +fw_domain_wait_ack_set_reserve(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) +{ + bool timeout; + + timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL); + if (likely(!timeout)) + return; + + timeout = fw_domain_reserve_fallback(i915, d, ACK_SET); + if (timeout) + fw_domain_wait_ack(i915, d); +} + +static inline void fw_domain_put(const struct drm_i915_private *i915, const struct intel_uncore_forcewake_domain *d) { @@ -125,6 +201,26 @@ fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains) } static void +fw_domains_get_with_reserve(struct drm_i915_private *i915, + enum forcewake_domains fw_domains) +{ + struct intel_uncore_forcewake_domain *d; + unsigned int tmp; + + GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains); + + for_each_fw_domain_masked(d, fw_domains, i915, tmp) { + fw_domain_wait_ack_clear_reserve(i915, d); + fw_domain_get(i915, d); + } + + for_each_fw_domain_masked(d, fw_domains, i915, tmp) + fw_domain_wait_ack_set_reserve(i915, d); + + i915->uncore.fw_domains_active |= fw_domains; +} + +static void fw_domains_put(struct drm_i915_private *i915, enum forcewake_domains fw_domains) { struct intel_uncore_forcewake_domain *d; @@ -1142,7 +1238,9 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) } if (INTEL_GEN(dev_priv) >= 9) { - dev_priv->uncore.funcs.force_wake_get = fw_domains_get; + /* WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl */ + dev_priv->uncore.funcs.force_wake_get = + fw_domains_get_with_reserve; dev_priv->uncore.funcs.force_wake_put = fw_domains_put; fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER_GEN9,
There is a possibility on gen9 hardware to miss the forcewake ack message. The recommended workaround is to use another free bit and toggle it until original bit is successfully acknowledged. The workaround is a bit misleadingly named as WaRsForcewakeAddDelayForAck. The reason for naming is most likely that workaround was named before the most recent recommendation evolved to use the reserve bit. Some future gen9 revs might or might not fix the underlying issue but the fallback to reserve bit dance can be considered as harmless: without the ack timeout we never reach the reserve bit forcewake. Thus as of now we adopt a blanket approach for all gen9 and leave the bypassing the reserve bit approach for future patches if corresponding hw revisions do appear. Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms for forcewake request/clear ack") did increase the forcewake timeout. If the issue was a delayed ack, future work could include finding a suitable timeout value both for primary ack and reserve toggle to reduce the worst case latency. References: HSDES #1604254524 References: https://bugs.freedesktop.org/show_bug.cgi?id=102051 Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> --- drivers/gpu/drm/i915/i915_reg.h | 5 +- drivers/gpu/drm/i915/intel_uncore.c | 112 +++++++++++++++++++++++++++++++++--- 2 files changed, 108 insertions(+), 9 deletions(-)