Message ID | 20190729152301.22588-5-michal.wajdeczko@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | add more probe failures | expand |
On 7/29/19 8:23 AM, Michal Wajdeczko wrote: > We shouldn't immediately fail on WOPCM partitioning or programming > as we plan to restore fallback on any GuC related failures. > > While around, add some more probe failure injections. > I was planning to move the uC wopcm programming to intel_uc_init_hw() (where you now have intel_wopcm_is_ready) since we're actually not initializing any wopcm HW but uC HW related to how/where the uC accesses the wopcm. That would allow us to return the failure directly from the function without having to save state. Thoughts? Daniele > Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/gt/uc/intel_uc.c | 3 ++ > drivers/gpu/drm/i915/i915_gem.c | 12 +---- > drivers/gpu/drm/i915/intel_wopcm.c | 66 +++++++++++++++------------ > drivers/gpu/drm/i915/intel_wopcm.h | 11 ++++- > 4 files changed, 50 insertions(+), 42 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c > index 9e1156c29cb1..f096063189d4 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c > @@ -410,6 +410,9 @@ int intel_uc_init_hw(struct intel_uc *uc) > > GEM_BUG_ON(!intel_uc_fw_supported(&guc->fw)); > > + if (!intel_wopcm_is_ready(&i915->wopcm)) > + return -ENXIO; > + > guc_reset_interrupts(guc); > > /* WaEnableuKernelHeaderValidFix:skl */ > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index c437ab5554ec..02e09864856f 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1239,11 +1239,7 @@ int i915_gem_init_hw(struct drm_i915_private *i915) > goto out; > } > > - ret = intel_wopcm_init_hw(&i915->wopcm, gt); > - if (ret) { > - DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); > - goto out; > - } > + intel_wopcm_init_hw(&i915->wopcm, gt); > > /* We can't enable contexts until all firmware is loaded */ > ret = intel_uc_init_hw(&i915->gt.uc); > @@ -1432,10 +1428,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) > return ret; > > intel_uc_fetch_firmwares(&dev_priv->gt.uc); > - > - ret = intel_wopcm_init(&dev_priv->wopcm); > - if (ret) > - goto err_uc_fw; > + intel_wopcm_init(&dev_priv->wopcm); > > /* This is just a security blanket to placate dragons. > * On some systems, we very sporadically observe that the first TLBs > @@ -1559,7 +1552,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) > intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); > mutex_unlock(&dev_priv->drm.struct_mutex); > > -err_uc_fw: > intel_uc_cleanup_firmwares(&dev_priv->gt.uc); > > if (ret != -EIO) { > diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c > index e173a8e61bfd..89b2ffef879a 100644 > --- a/drivers/gpu/drm/i915/intel_wopcm.c > +++ b/drivers/gpu/drm/i915/intel_wopcm.c > @@ -137,7 +137,11 @@ static inline int check_hw_restriction(struct drm_i915_private *i915, > u32 guc_wopcm_base, u32 guc_wopcm_size, > u32 huc_fw_size) > { > - int err = 0; > + int err; > + > + err = i915_inject_load_error(i915, -E2BIG); > + if (err) > + return err; > > if (IS_GEN(i915, 9)) > err = gen9_check_dword_gap(guc_wopcm_base, guc_wopcm_size); > @@ -156,12 +160,10 @@ static inline int check_hw_restriction(struct drm_i915_private *i915, > * This function will partition WOPCM space based on GuC and HuC firmware sizes > * and will allocate max remaining for use by GuC. This function will also > * enforce platform dependent hardware restrictions on GuC WOPCM offset and > - * size. It will fail the WOPCM init if any of these checks were failed, so that > - * the following GuC firmware uploading would be aborted. > - * > - * Return: 0 on success, non-zero error code on failure. > + * size. It will fail the WOPCM init if any of these checks fail, so that the > + * following WOPCM registers setup and GuC firmware uploading would be aborted. > */ > -int intel_wopcm_init(struct intel_wopcm *wopcm) > +void intel_wopcm_init(struct intel_wopcm *wopcm) > { > struct drm_i915_private *i915 = wopcm_to_i915(wopcm); > u32 guc_fw_size = intel_uc_fw_get_upload_size(&i915->gt.uc.guc.fw); > @@ -173,23 +175,23 @@ int intel_wopcm_init(struct intel_wopcm *wopcm) > int err; > > if (!USES_GUC(i915)) > - return 0; > + return; > > GEM_BUG_ON(!wopcm->size); > > if (i915_inject_probe_failure(i915)) > - return -E2BIG; > + return; > > if (guc_fw_size >= wopcm->size) { > DRM_ERROR("GuC FW (%uKiB) is too big to fit in WOPCM.", > guc_fw_size / 1024); > - return -E2BIG; > + goto fail; > } > > if (huc_fw_size >= wopcm->size) { > DRM_ERROR("HuC FW (%uKiB) is too big to fit in WOPCM.", > huc_fw_size / 1024); > - return -E2BIG; > + goto fail; > } > > guc_wopcm_base = ALIGN(huc_fw_size + WOPCM_RESERVED_SIZE, > @@ -197,7 +199,7 @@ int intel_wopcm_init(struct intel_wopcm *wopcm) > if ((guc_wopcm_base + ctx_rsvd) >= wopcm->size) { > DRM_ERROR("GuC WOPCM base (%uKiB) is too big.\n", > guc_wopcm_base / 1024); > - return -E2BIG; > + goto fail; > } > > guc_wopcm_size = wopcm->size - guc_wopcm_base - ctx_rsvd; > @@ -211,18 +213,19 @@ int intel_wopcm_init(struct intel_wopcm *wopcm) > DRM_ERROR("Need %uKiB WOPCM for GuC, %uKiB available.\n", > (guc_fw_size + guc_wopcm_rsvd) / 1024, > guc_wopcm_size / 1024); > - return -E2BIG; > + goto fail; > } > > err = check_hw_restriction(i915, guc_wopcm_base, guc_wopcm_size, > huc_fw_size); > if (err) > - return err; > + goto fail; > > wopcm->guc.base = guc_wopcm_base; > wopcm->guc.size = guc_wopcm_size; > - > - return 0; > + return; > +fail: > + I915_WARN(i915, "WOPCM partitioning failed, GuC will fail to load!\n"); > } > > static int > @@ -231,14 +234,25 @@ write_and_verify(struct intel_gt *gt, > { > struct intel_uncore *uncore = gt->uncore; > u32 reg_val; > + int err; > > GEM_BUG_ON(val & ~mask); > > + err = i915_inject_load_error(gt->i915, -EIO); > + if (err) > + return err; > + > intel_uncore_write(uncore, reg, val); > > reg_val = intel_uncore_read(uncore, reg); > > - return (reg_val & mask) != (val | locked_bit) ? -EIO : 0; > + if ((reg_val & mask) != (val | locked_bit)) { > + I915_WARN(gt->i915, "WOPCM register %#x=%#x\n", > + i915_mmio_reg_offset(reg), reg_val); > + return -EIO; > + } > + > + return 0; > } > > /** > @@ -249,19 +263,16 @@ write_and_verify(struct intel_gt *gt, > * Setup the GuC WOPCM size and offset registers with the calculated values. It > * will verify the register values to make sure the registers are locked with > * correct values. > - * > - * Return: 0 on success. -EIO if registers were locked with incorrect values. > */ > -int intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt) > +void intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt) > { > struct drm_i915_private *i915 = wopcm_to_i915(wopcm); > - struct intel_uncore *uncore = gt->uncore; > u32 huc_agent; > u32 mask; > int err; > > - if (!USES_GUC(i915)) > - return 0; > + if (!intel_wopcm_guc_size(wopcm)) > + return; > > GEM_BUG_ON(!HAS_GT_UC(i915)); > GEM_BUG_ON(!wopcm->guc.size); > @@ -281,14 +292,9 @@ int intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt) > if (err) > goto err_out; > > - return 0; > + wopcm->ready = true; > + return; > > err_out: > - DRM_ERROR("Failed to init WOPCM registers:\n"); > - DRM_ERROR("DMA_GUC_WOPCM_OFFSET=%#x\n", > - intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET)); > - DRM_ERROR("GUC_WOPCM_SIZE=%#x\n", > - intel_uncore_read(uncore, GUC_WOPCM_SIZE)); > - > - return err; > + I915_WARN(i915, "WOPCM programming failed, GuC will fail to load!\n"); > } > diff --git a/drivers/gpu/drm/i915/intel_wopcm.h b/drivers/gpu/drm/i915/intel_wopcm.h > index 56aaed4d64ff..daf9c1dbe20b 100644 > --- a/drivers/gpu/drm/i915/intel_wopcm.h > +++ b/drivers/gpu/drm/i915/intel_wopcm.h > @@ -17,6 +17,7 @@ struct intel_gt; > * @guc: GuC WOPCM Region info. > * @guc.base: GuC WOPCM base which is offset from WOPCM base. > * @guc.size: Size of the GuC WOPCM region. > + * @ready: indicates that WOPCM registers are correctly programmed. > */ > struct intel_wopcm { > u32 size; > @@ -24,6 +25,7 @@ struct intel_wopcm { > u32 base; > u32 size; > } guc; > + bool ready; > }; > > /** > @@ -42,7 +44,12 @@ static inline u32 intel_wopcm_guc_size(struct intel_wopcm *wopcm) > } > > void intel_wopcm_init_early(struct intel_wopcm *wopcm); > -int intel_wopcm_init(struct intel_wopcm *wopcm); > -int intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt); > +void intel_wopcm_init(struct intel_wopcm *wopcm); > +void intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt); > + > +static inline bool intel_wopcm_is_ready(struct intel_wopcm *wopcm) > +{ > + return wopcm->ready; > +} > > #endif >
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 9e1156c29cb1..f096063189d4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -410,6 +410,9 @@ int intel_uc_init_hw(struct intel_uc *uc) GEM_BUG_ON(!intel_uc_fw_supported(&guc->fw)); + if (!intel_wopcm_is_ready(&i915->wopcm)) + return -ENXIO; + guc_reset_interrupts(guc); /* WaEnableuKernelHeaderValidFix:skl */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c437ab5554ec..02e09864856f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1239,11 +1239,7 @@ int i915_gem_init_hw(struct drm_i915_private *i915) goto out; } - ret = intel_wopcm_init_hw(&i915->wopcm, gt); - if (ret) { - DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); - goto out; - } + intel_wopcm_init_hw(&i915->wopcm, gt); /* We can't enable contexts until all firmware is loaded */ ret = intel_uc_init_hw(&i915->gt.uc); @@ -1432,10 +1428,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) return ret; intel_uc_fetch_firmwares(&dev_priv->gt.uc); - - ret = intel_wopcm_init(&dev_priv->wopcm); - if (ret) - goto err_uc_fw; + intel_wopcm_init(&dev_priv->wopcm); /* This is just a security blanket to placate dragons. * On some systems, we very sporadically observe that the first TLBs @@ -1559,7 +1552,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); mutex_unlock(&dev_priv->drm.struct_mutex); -err_uc_fw: intel_uc_cleanup_firmwares(&dev_priv->gt.uc); if (ret != -EIO) { diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c index e173a8e61bfd..89b2ffef879a 100644 --- a/drivers/gpu/drm/i915/intel_wopcm.c +++ b/drivers/gpu/drm/i915/intel_wopcm.c @@ -137,7 +137,11 @@ static inline int check_hw_restriction(struct drm_i915_private *i915, u32 guc_wopcm_base, u32 guc_wopcm_size, u32 huc_fw_size) { - int err = 0; + int err; + + err = i915_inject_load_error(i915, -E2BIG); + if (err) + return err; if (IS_GEN(i915, 9)) err = gen9_check_dword_gap(guc_wopcm_base, guc_wopcm_size); @@ -156,12 +160,10 @@ static inline int check_hw_restriction(struct drm_i915_private *i915, * This function will partition WOPCM space based on GuC and HuC firmware sizes * and will allocate max remaining for use by GuC. This function will also * enforce platform dependent hardware restrictions on GuC WOPCM offset and - * size. It will fail the WOPCM init if any of these checks were failed, so that - * the following GuC firmware uploading would be aborted. - * - * Return: 0 on success, non-zero error code on failure. + * size. It will fail the WOPCM init if any of these checks fail, so that the + * following WOPCM registers setup and GuC firmware uploading would be aborted. */ -int intel_wopcm_init(struct intel_wopcm *wopcm) +void intel_wopcm_init(struct intel_wopcm *wopcm) { struct drm_i915_private *i915 = wopcm_to_i915(wopcm); u32 guc_fw_size = intel_uc_fw_get_upload_size(&i915->gt.uc.guc.fw); @@ -173,23 +175,23 @@ int intel_wopcm_init(struct intel_wopcm *wopcm) int err; if (!USES_GUC(i915)) - return 0; + return; GEM_BUG_ON(!wopcm->size); if (i915_inject_probe_failure(i915)) - return -E2BIG; + return; if (guc_fw_size >= wopcm->size) { DRM_ERROR("GuC FW (%uKiB) is too big to fit in WOPCM.", guc_fw_size / 1024); - return -E2BIG; + goto fail; } if (huc_fw_size >= wopcm->size) { DRM_ERROR("HuC FW (%uKiB) is too big to fit in WOPCM.", huc_fw_size / 1024); - return -E2BIG; + goto fail; } guc_wopcm_base = ALIGN(huc_fw_size + WOPCM_RESERVED_SIZE, @@ -197,7 +199,7 @@ int intel_wopcm_init(struct intel_wopcm *wopcm) if ((guc_wopcm_base + ctx_rsvd) >= wopcm->size) { DRM_ERROR("GuC WOPCM base (%uKiB) is too big.\n", guc_wopcm_base / 1024); - return -E2BIG; + goto fail; } guc_wopcm_size = wopcm->size - guc_wopcm_base - ctx_rsvd; @@ -211,18 +213,19 @@ int intel_wopcm_init(struct intel_wopcm *wopcm) DRM_ERROR("Need %uKiB WOPCM for GuC, %uKiB available.\n", (guc_fw_size + guc_wopcm_rsvd) / 1024, guc_wopcm_size / 1024); - return -E2BIG; + goto fail; } err = check_hw_restriction(i915, guc_wopcm_base, guc_wopcm_size, huc_fw_size); if (err) - return err; + goto fail; wopcm->guc.base = guc_wopcm_base; wopcm->guc.size = guc_wopcm_size; - - return 0; + return; +fail: + I915_WARN(i915, "WOPCM partitioning failed, GuC will fail to load!\n"); } static int @@ -231,14 +234,25 @@ write_and_verify(struct intel_gt *gt, { struct intel_uncore *uncore = gt->uncore; u32 reg_val; + int err; GEM_BUG_ON(val & ~mask); + err = i915_inject_load_error(gt->i915, -EIO); + if (err) + return err; + intel_uncore_write(uncore, reg, val); reg_val = intel_uncore_read(uncore, reg); - return (reg_val & mask) != (val | locked_bit) ? -EIO : 0; + if ((reg_val & mask) != (val | locked_bit)) { + I915_WARN(gt->i915, "WOPCM register %#x=%#x\n", + i915_mmio_reg_offset(reg), reg_val); + return -EIO; + } + + return 0; } /** @@ -249,19 +263,16 @@ write_and_verify(struct intel_gt *gt, * Setup the GuC WOPCM size and offset registers with the calculated values. It * will verify the register values to make sure the registers are locked with * correct values. - * - * Return: 0 on success. -EIO if registers were locked with incorrect values. */ -int intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt) +void intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt) { struct drm_i915_private *i915 = wopcm_to_i915(wopcm); - struct intel_uncore *uncore = gt->uncore; u32 huc_agent; u32 mask; int err; - if (!USES_GUC(i915)) - return 0; + if (!intel_wopcm_guc_size(wopcm)) + return; GEM_BUG_ON(!HAS_GT_UC(i915)); GEM_BUG_ON(!wopcm->guc.size); @@ -281,14 +292,9 @@ int intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt) if (err) goto err_out; - return 0; + wopcm->ready = true; + return; err_out: - DRM_ERROR("Failed to init WOPCM registers:\n"); - DRM_ERROR("DMA_GUC_WOPCM_OFFSET=%#x\n", - intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET)); - DRM_ERROR("GUC_WOPCM_SIZE=%#x\n", - intel_uncore_read(uncore, GUC_WOPCM_SIZE)); - - return err; + I915_WARN(i915, "WOPCM programming failed, GuC will fail to load!\n"); } diff --git a/drivers/gpu/drm/i915/intel_wopcm.h b/drivers/gpu/drm/i915/intel_wopcm.h index 56aaed4d64ff..daf9c1dbe20b 100644 --- a/drivers/gpu/drm/i915/intel_wopcm.h +++ b/drivers/gpu/drm/i915/intel_wopcm.h @@ -17,6 +17,7 @@ struct intel_gt; * @guc: GuC WOPCM Region info. * @guc.base: GuC WOPCM base which is offset from WOPCM base. * @guc.size: Size of the GuC WOPCM region. + * @ready: indicates that WOPCM registers are correctly programmed. */ struct intel_wopcm { u32 size; @@ -24,6 +25,7 @@ struct intel_wopcm { u32 base; u32 size; } guc; + bool ready; }; /** @@ -42,7 +44,12 @@ static inline u32 intel_wopcm_guc_size(struct intel_wopcm *wopcm) } void intel_wopcm_init_early(struct intel_wopcm *wopcm); -int intel_wopcm_init(struct intel_wopcm *wopcm); -int intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt); +void intel_wopcm_init(struct intel_wopcm *wopcm); +void intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt); + +static inline bool intel_wopcm_is_ready(struct intel_wopcm *wopcm) +{ + return wopcm->ready; +} #endif
We shouldn't immediately fail on WOPCM partitioning or programming as we plan to restore fallback on any GuC related failures. While around, add some more probe failure injections. Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 3 ++ drivers/gpu/drm/i915/i915_gem.c | 12 +---- drivers/gpu/drm/i915/intel_wopcm.c | 66 +++++++++++++++------------ drivers/gpu/drm/i915/intel_wopcm.h | 11 ++++- 4 files changed, 50 insertions(+), 42 deletions(-)