diff mbox series

[4/4] drm/i915/wopcm: Don't fail on WOPCM partitioning failure

Message ID 20190729152301.22588-5-michal.wajdeczko@intel.com (mailing list archive)
State New, archived
Headers show
Series add more probe failures | expand

Commit Message

Michal Wajdeczko July 29, 2019, 3:23 p.m. UTC
We shouldn't immediately fail on WOPCM partitioning or programming
as we plan to restore fallback on any GuC related failures.

While around, add some more probe failure injections.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/uc/intel_uc.c |  3 ++
 drivers/gpu/drm/i915/i915_gem.c       | 12 +----
 drivers/gpu/drm/i915/intel_wopcm.c    | 66 +++++++++++++++------------
 drivers/gpu/drm/i915/intel_wopcm.h    | 11 ++++-
 4 files changed, 50 insertions(+), 42 deletions(-)

Comments

Daniele Ceraolo Spurio July 29, 2019, 6:04 p.m. UTC | #1
On 7/29/19 8:23 AM, Michal Wajdeczko wrote:
> We shouldn't immediately fail on WOPCM partitioning or programming
> as we plan to restore fallback on any GuC related failures.
> 
> While around, add some more probe failure injections.
> 

I was planning to move the uC wopcm programming to intel_uc_init_hw() 
(where you now have intel_wopcm_is_ready) since we're actually not 
initializing any wopcm HW but uC HW related to how/where the uC accesses 
the wopcm. That would allow us to return the failure directly from the 
function without having to save state. Thoughts?

Daniele

> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/uc/intel_uc.c |  3 ++
>   drivers/gpu/drm/i915/i915_gem.c       | 12 +----
>   drivers/gpu/drm/i915/intel_wopcm.c    | 66 +++++++++++++++------------
>   drivers/gpu/drm/i915/intel_wopcm.h    | 11 ++++-
>   4 files changed, 50 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index 9e1156c29cb1..f096063189d4 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -410,6 +410,9 @@ int intel_uc_init_hw(struct intel_uc *uc)
>   
>   	GEM_BUG_ON(!intel_uc_fw_supported(&guc->fw));
>   
> +	if (!intel_wopcm_is_ready(&i915->wopcm))
> +		return -ENXIO;
> +
>   	guc_reset_interrupts(guc);
>   
>   	/* WaEnableuKernelHeaderValidFix:skl */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index c437ab5554ec..02e09864856f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1239,11 +1239,7 @@ int i915_gem_init_hw(struct drm_i915_private *i915)
>   		goto out;
>   	}
>   
> -	ret = intel_wopcm_init_hw(&i915->wopcm, gt);
> -	if (ret) {
> -		DRM_ERROR("Enabling WOPCM failed (%d)\n", ret);
> -		goto out;
> -	}
> +	intel_wopcm_init_hw(&i915->wopcm, gt);
>   
>   	/* We can't enable contexts until all firmware is loaded */
>   	ret = intel_uc_init_hw(&i915->gt.uc);
> @@ -1432,10 +1428,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   		return ret;
>   
>   	intel_uc_fetch_firmwares(&dev_priv->gt.uc);
> -
> -	ret = intel_wopcm_init(&dev_priv->wopcm);
> -	if (ret)
> -		goto err_uc_fw;
> +	intel_wopcm_init(&dev_priv->wopcm);
>   
>   	/* This is just a security blanket to placate dragons.
>   	 * On some systems, we very sporadically observe that the first TLBs
> @@ -1559,7 +1552,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
>   	mutex_unlock(&dev_priv->drm.struct_mutex);
>   
> -err_uc_fw:
>   	intel_uc_cleanup_firmwares(&dev_priv->gt.uc);
>   
>   	if (ret != -EIO) {
> diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c
> index e173a8e61bfd..89b2ffef879a 100644
> --- a/drivers/gpu/drm/i915/intel_wopcm.c
> +++ b/drivers/gpu/drm/i915/intel_wopcm.c
> @@ -137,7 +137,11 @@ static inline int check_hw_restriction(struct drm_i915_private *i915,
>   				       u32 guc_wopcm_base, u32 guc_wopcm_size,
>   				       u32 huc_fw_size)
>   {
> -	int err = 0;
> +	int err;
> +
> +	err = i915_inject_load_error(i915, -E2BIG);
> +	if (err)
> +		return err;
>   
>   	if (IS_GEN(i915, 9))
>   		err = gen9_check_dword_gap(guc_wopcm_base, guc_wopcm_size);
> @@ -156,12 +160,10 @@ static inline int check_hw_restriction(struct drm_i915_private *i915,
>    * This function will partition WOPCM space based on GuC and HuC firmware sizes
>    * and will allocate max remaining for use by GuC. This function will also
>    * enforce platform dependent hardware restrictions on GuC WOPCM offset and
> - * size. It will fail the WOPCM init if any of these checks were failed, so that
> - * the following GuC firmware uploading would be aborted.
> - *
> - * Return: 0 on success, non-zero error code on failure.
> + * size. It will fail the WOPCM init if any of these checks fail, so that the
> + * following WOPCM registers setup and GuC firmware uploading would be aborted.
>    */
> -int intel_wopcm_init(struct intel_wopcm *wopcm)
> +void intel_wopcm_init(struct intel_wopcm *wopcm)
>   {
>   	struct drm_i915_private *i915 = wopcm_to_i915(wopcm);
>   	u32 guc_fw_size = intel_uc_fw_get_upload_size(&i915->gt.uc.guc.fw);
> @@ -173,23 +175,23 @@ int intel_wopcm_init(struct intel_wopcm *wopcm)
>   	int err;
>   
>   	if (!USES_GUC(i915))
> -		return 0;
> +		return;
>   
>   	GEM_BUG_ON(!wopcm->size);
>   
>   	if (i915_inject_probe_failure(i915))
> -		return -E2BIG;
> +		return;
>   
>   	if (guc_fw_size >= wopcm->size) {
>   		DRM_ERROR("GuC FW (%uKiB) is too big to fit in WOPCM.",
>   			  guc_fw_size / 1024);
> -		return -E2BIG;
> +		goto fail;
>   	}
>   
>   	if (huc_fw_size >= wopcm->size) {
>   		DRM_ERROR("HuC FW (%uKiB) is too big to fit in WOPCM.",
>   			  huc_fw_size / 1024);
> -		return -E2BIG;
> +		goto fail;
>   	}
>   
>   	guc_wopcm_base = ALIGN(huc_fw_size + WOPCM_RESERVED_SIZE,
> @@ -197,7 +199,7 @@ int intel_wopcm_init(struct intel_wopcm *wopcm)
>   	if ((guc_wopcm_base + ctx_rsvd) >= wopcm->size) {
>   		DRM_ERROR("GuC WOPCM base (%uKiB) is too big.\n",
>   			  guc_wopcm_base / 1024);
> -		return -E2BIG;
> +		goto fail;
>   	}
>   
>   	guc_wopcm_size = wopcm->size - guc_wopcm_base - ctx_rsvd;
> @@ -211,18 +213,19 @@ int intel_wopcm_init(struct intel_wopcm *wopcm)
>   		DRM_ERROR("Need %uKiB WOPCM for GuC, %uKiB available.\n",
>   			  (guc_fw_size + guc_wopcm_rsvd) / 1024,
>   			  guc_wopcm_size / 1024);
> -		return -E2BIG;
> +		goto fail;
>   	}
>   
>   	err = check_hw_restriction(i915, guc_wopcm_base, guc_wopcm_size,
>   				   huc_fw_size);
>   	if (err)
> -		return err;
> +		goto fail;
>   
>   	wopcm->guc.base = guc_wopcm_base;
>   	wopcm->guc.size = guc_wopcm_size;
> -
> -	return 0;
> +	return;
> +fail:
> +	I915_WARN(i915, "WOPCM partitioning failed, GuC will fail to load!\n");
>   }
>   
>   static int
> @@ -231,14 +234,25 @@ write_and_verify(struct intel_gt *gt,
>   {
>   	struct intel_uncore *uncore = gt->uncore;
>   	u32 reg_val;
> +	int err;
>   
>   	GEM_BUG_ON(val & ~mask);
>   
> +	err = i915_inject_load_error(gt->i915, -EIO);
> +	if (err)
> +		return err;
> +
>   	intel_uncore_write(uncore, reg, val);
>   
>   	reg_val = intel_uncore_read(uncore, reg);
>   
> -	return (reg_val & mask) != (val | locked_bit) ? -EIO : 0;
> +	if ((reg_val & mask) != (val | locked_bit)) {
> +		I915_WARN(gt->i915, "WOPCM register %#x=%#x\n",
> +			  i915_mmio_reg_offset(reg), reg_val);
> +		return -EIO;
> +	}
> +
> +	return 0;
>   }
>   
>   /**
> @@ -249,19 +263,16 @@ write_and_verify(struct intel_gt *gt,
>    * Setup the GuC WOPCM size and offset registers with the calculated values. It
>    * will verify the register values to make sure the registers are locked with
>    * correct values.
> - *
> - * Return: 0 on success. -EIO if registers were locked with incorrect values.
>    */
> -int intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt)
> +void intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt)
>   {
>   	struct drm_i915_private *i915 = wopcm_to_i915(wopcm);
> -	struct intel_uncore *uncore = gt->uncore;
>   	u32 huc_agent;
>   	u32 mask;
>   	int err;
>   
> -	if (!USES_GUC(i915))
> -		return 0;
> +	if (!intel_wopcm_guc_size(wopcm))
> +		return;
>   
>   	GEM_BUG_ON(!HAS_GT_UC(i915));
>   	GEM_BUG_ON(!wopcm->guc.size);
> @@ -281,14 +292,9 @@ int intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt)
>   	if (err)
>   		goto err_out;
>   
> -	return 0;
> +	wopcm->ready = true;
> +	return;
>   
>   err_out:
> -	DRM_ERROR("Failed to init WOPCM registers:\n");
> -	DRM_ERROR("DMA_GUC_WOPCM_OFFSET=%#x\n",
> -		  intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET));
> -	DRM_ERROR("GUC_WOPCM_SIZE=%#x\n",
> -		  intel_uncore_read(uncore, GUC_WOPCM_SIZE));
> -
> -	return err;
> +	I915_WARN(i915, "WOPCM programming failed, GuC will fail to load!\n");
>   }
> diff --git a/drivers/gpu/drm/i915/intel_wopcm.h b/drivers/gpu/drm/i915/intel_wopcm.h
> index 56aaed4d64ff..daf9c1dbe20b 100644
> --- a/drivers/gpu/drm/i915/intel_wopcm.h
> +++ b/drivers/gpu/drm/i915/intel_wopcm.h
> @@ -17,6 +17,7 @@ struct intel_gt;
>    * @guc: GuC WOPCM Region info.
>    * @guc.base: GuC WOPCM base which is offset from WOPCM base.
>    * @guc.size: Size of the GuC WOPCM region.
> + * @ready: indicates that WOPCM registers are correctly programmed.
>    */
>   struct intel_wopcm {
>   	u32 size;
> @@ -24,6 +25,7 @@ struct intel_wopcm {
>   		u32 base;
>   		u32 size;
>   	} guc;
> +	bool ready;
>   };
>   
>   /**
> @@ -42,7 +44,12 @@ static inline u32 intel_wopcm_guc_size(struct intel_wopcm *wopcm)
>   }
>   
>   void intel_wopcm_init_early(struct intel_wopcm *wopcm);
> -int intel_wopcm_init(struct intel_wopcm *wopcm);
> -int intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt);
> +void intel_wopcm_init(struct intel_wopcm *wopcm);
> +void intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt);
> +
> +static inline bool intel_wopcm_is_ready(struct intel_wopcm *wopcm)
> +{
> +	return wopcm->ready;
> +}
>   
>   #endif
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 9e1156c29cb1..f096063189d4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -410,6 +410,9 @@  int intel_uc_init_hw(struct intel_uc *uc)
 
 	GEM_BUG_ON(!intel_uc_fw_supported(&guc->fw));
 
+	if (!intel_wopcm_is_ready(&i915->wopcm))
+		return -ENXIO;
+
 	guc_reset_interrupts(guc);
 
 	/* WaEnableuKernelHeaderValidFix:skl */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c437ab5554ec..02e09864856f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1239,11 +1239,7 @@  int i915_gem_init_hw(struct drm_i915_private *i915)
 		goto out;
 	}
 
-	ret = intel_wopcm_init_hw(&i915->wopcm, gt);
-	if (ret) {
-		DRM_ERROR("Enabling WOPCM failed (%d)\n", ret);
-		goto out;
-	}
+	intel_wopcm_init_hw(&i915->wopcm, gt);
 
 	/* We can't enable contexts until all firmware is loaded */
 	ret = intel_uc_init_hw(&i915->gt.uc);
@@ -1432,10 +1428,7 @@  int i915_gem_init(struct drm_i915_private *dev_priv)
 		return ret;
 
 	intel_uc_fetch_firmwares(&dev_priv->gt.uc);
-
-	ret = intel_wopcm_init(&dev_priv->wopcm);
-	if (ret)
-		goto err_uc_fw;
+	intel_wopcm_init(&dev_priv->wopcm);
 
 	/* This is just a security blanket to placate dragons.
 	 * On some systems, we very sporadically observe that the first TLBs
@@ -1559,7 +1552,6 @@  int i915_gem_init(struct drm_i915_private *dev_priv)
 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-err_uc_fw:
 	intel_uc_cleanup_firmwares(&dev_priv->gt.uc);
 
 	if (ret != -EIO) {
diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c
index e173a8e61bfd..89b2ffef879a 100644
--- a/drivers/gpu/drm/i915/intel_wopcm.c
+++ b/drivers/gpu/drm/i915/intel_wopcm.c
@@ -137,7 +137,11 @@  static inline int check_hw_restriction(struct drm_i915_private *i915,
 				       u32 guc_wopcm_base, u32 guc_wopcm_size,
 				       u32 huc_fw_size)
 {
-	int err = 0;
+	int err;
+
+	err = i915_inject_load_error(i915, -E2BIG);
+	if (err)
+		return err;
 
 	if (IS_GEN(i915, 9))
 		err = gen9_check_dword_gap(guc_wopcm_base, guc_wopcm_size);
@@ -156,12 +160,10 @@  static inline int check_hw_restriction(struct drm_i915_private *i915,
  * This function will partition WOPCM space based on GuC and HuC firmware sizes
  * and will allocate max remaining for use by GuC. This function will also
  * enforce platform dependent hardware restrictions on GuC WOPCM offset and
- * size. It will fail the WOPCM init if any of these checks were failed, so that
- * the following GuC firmware uploading would be aborted.
- *
- * Return: 0 on success, non-zero error code on failure.
+ * size. It will fail the WOPCM init if any of these checks fail, so that the
+ * following WOPCM registers setup and GuC firmware uploading would be aborted.
  */
-int intel_wopcm_init(struct intel_wopcm *wopcm)
+void intel_wopcm_init(struct intel_wopcm *wopcm)
 {
 	struct drm_i915_private *i915 = wopcm_to_i915(wopcm);
 	u32 guc_fw_size = intel_uc_fw_get_upload_size(&i915->gt.uc.guc.fw);
@@ -173,23 +175,23 @@  int intel_wopcm_init(struct intel_wopcm *wopcm)
 	int err;
 
 	if (!USES_GUC(i915))
-		return 0;
+		return;
 
 	GEM_BUG_ON(!wopcm->size);
 
 	if (i915_inject_probe_failure(i915))
-		return -E2BIG;
+		return;
 
 	if (guc_fw_size >= wopcm->size) {
 		DRM_ERROR("GuC FW (%uKiB) is too big to fit in WOPCM.",
 			  guc_fw_size / 1024);
-		return -E2BIG;
+		goto fail;
 	}
 
 	if (huc_fw_size >= wopcm->size) {
 		DRM_ERROR("HuC FW (%uKiB) is too big to fit in WOPCM.",
 			  huc_fw_size / 1024);
-		return -E2BIG;
+		goto fail;
 	}
 
 	guc_wopcm_base = ALIGN(huc_fw_size + WOPCM_RESERVED_SIZE,
@@ -197,7 +199,7 @@  int intel_wopcm_init(struct intel_wopcm *wopcm)
 	if ((guc_wopcm_base + ctx_rsvd) >= wopcm->size) {
 		DRM_ERROR("GuC WOPCM base (%uKiB) is too big.\n",
 			  guc_wopcm_base / 1024);
-		return -E2BIG;
+		goto fail;
 	}
 
 	guc_wopcm_size = wopcm->size - guc_wopcm_base - ctx_rsvd;
@@ -211,18 +213,19 @@  int intel_wopcm_init(struct intel_wopcm *wopcm)
 		DRM_ERROR("Need %uKiB WOPCM for GuC, %uKiB available.\n",
 			  (guc_fw_size + guc_wopcm_rsvd) / 1024,
 			  guc_wopcm_size / 1024);
-		return -E2BIG;
+		goto fail;
 	}
 
 	err = check_hw_restriction(i915, guc_wopcm_base, guc_wopcm_size,
 				   huc_fw_size);
 	if (err)
-		return err;
+		goto fail;
 
 	wopcm->guc.base = guc_wopcm_base;
 	wopcm->guc.size = guc_wopcm_size;
-
-	return 0;
+	return;
+fail:
+	I915_WARN(i915, "WOPCM partitioning failed, GuC will fail to load!\n");
 }
 
 static int
@@ -231,14 +234,25 @@  write_and_verify(struct intel_gt *gt,
 {
 	struct intel_uncore *uncore = gt->uncore;
 	u32 reg_val;
+	int err;
 
 	GEM_BUG_ON(val & ~mask);
 
+	err = i915_inject_load_error(gt->i915, -EIO);
+	if (err)
+		return err;
+
 	intel_uncore_write(uncore, reg, val);
 
 	reg_val = intel_uncore_read(uncore, reg);
 
-	return (reg_val & mask) != (val | locked_bit) ? -EIO : 0;
+	if ((reg_val & mask) != (val | locked_bit)) {
+		I915_WARN(gt->i915, "WOPCM register %#x=%#x\n",
+			  i915_mmio_reg_offset(reg), reg_val);
+		return -EIO;
+	}
+
+	return 0;
 }
 
 /**
@@ -249,19 +263,16 @@  write_and_verify(struct intel_gt *gt,
  * Setup the GuC WOPCM size and offset registers with the calculated values. It
  * will verify the register values to make sure the registers are locked with
  * correct values.
- *
- * Return: 0 on success. -EIO if registers were locked with incorrect values.
  */
-int intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt)
+void intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt)
 {
 	struct drm_i915_private *i915 = wopcm_to_i915(wopcm);
-	struct intel_uncore *uncore = gt->uncore;
 	u32 huc_agent;
 	u32 mask;
 	int err;
 
-	if (!USES_GUC(i915))
-		return 0;
+	if (!intel_wopcm_guc_size(wopcm))
+		return;
 
 	GEM_BUG_ON(!HAS_GT_UC(i915));
 	GEM_BUG_ON(!wopcm->guc.size);
@@ -281,14 +292,9 @@  int intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt)
 	if (err)
 		goto err_out;
 
-	return 0;
+	wopcm->ready = true;
+	return;
 
 err_out:
-	DRM_ERROR("Failed to init WOPCM registers:\n");
-	DRM_ERROR("DMA_GUC_WOPCM_OFFSET=%#x\n",
-		  intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET));
-	DRM_ERROR("GUC_WOPCM_SIZE=%#x\n",
-		  intel_uncore_read(uncore, GUC_WOPCM_SIZE));
-
-	return err;
+	I915_WARN(i915, "WOPCM programming failed, GuC will fail to load!\n");
 }
diff --git a/drivers/gpu/drm/i915/intel_wopcm.h b/drivers/gpu/drm/i915/intel_wopcm.h
index 56aaed4d64ff..daf9c1dbe20b 100644
--- a/drivers/gpu/drm/i915/intel_wopcm.h
+++ b/drivers/gpu/drm/i915/intel_wopcm.h
@@ -17,6 +17,7 @@  struct intel_gt;
  * @guc: GuC WOPCM Region info.
  * @guc.base: GuC WOPCM base which is offset from WOPCM base.
  * @guc.size: Size of the GuC WOPCM region.
+ * @ready: indicates that WOPCM registers are correctly programmed.
  */
 struct intel_wopcm {
 	u32 size;
@@ -24,6 +25,7 @@  struct intel_wopcm {
 		u32 base;
 		u32 size;
 	} guc;
+	bool ready;
 };
 
 /**
@@ -42,7 +44,12 @@  static inline u32 intel_wopcm_guc_size(struct intel_wopcm *wopcm)
 }
 
 void intel_wopcm_init_early(struct intel_wopcm *wopcm);
-int intel_wopcm_init(struct intel_wopcm *wopcm);
-int intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt);
+void intel_wopcm_init(struct intel_wopcm *wopcm);
+void intel_wopcm_init_hw(struct intel_wopcm *wopcm, struct intel_gt *gt);
+
+static inline bool intel_wopcm_is_ready(struct intel_wopcm *wopcm)
+{
+	return wopcm->ready;
+}
 
 #endif