diff mbox

[2/7] drm/i915/guc: always reset GuC before loading firmware

Message ID 1458555400-22859-3-git-send-email-david.s.gordon@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Gordon March 21, 2016, 10:16 a.m. UTC
After a suspend-resume cycle, the resumed kernel has no idea what the
booted kernel may have done to the GuC before replacing itself with the
resumed image. In particular, it may have already loaded the GuC with
firmware, which will then cause this kernel's attempt to (re)load the
firmware to fail (GuC program memory is write-once!).

So here we choose to always reset the GuC just before (re)loading the
firmware, so the hardware should then be in a well-known state, and we
may even avoid some of the issues arising from unpredictable timing.

Also added some more fields & values to the definition of the GUC_STATUS
register, which is the key diagnostic indicator if the GuC load fails.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
Cc: Alex Dai <yu.dai@intel.com>

---
 drivers/gpu/drm/i915/i915_guc_reg.h     | 12 ++++++++--
 drivers/gpu/drm/i915/intel_guc_loader.c | 40 ++++++++++++++++-----------------
 2 files changed, 29 insertions(+), 23 deletions(-)

Comments

arun.siluvery@linux.intel.com March 22, 2016, 1:32 p.m. UTC | #1
On 21/03/2016 10:16, Dave Gordon wrote:
> After a suspend-resume cycle, the resumed kernel has no idea what the
> booted kernel may have done to the GuC before replacing itself with the
> resumed image. In particular, it may have already loaded the GuC with
> firmware, which will then cause this kernel's attempt to (re)load the
> firmware to fail (GuC program memory is write-once!).
>
> So here we choose to always reset the GuC just before (re)loading the
> firmware, so the hardware should then be in a well-known state, and we
> may even avoid some of the issues arising from unpredictable timing.
>
> Also added some more fields & values to the definition of the GUC_STATUS
> register, which is the key diagnostic indicator if the GuC load fails.
>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
> Cc: Alex Dai <yu.dai@intel.com>
>
Please add bugzilla reference.

> ---
>   drivers/gpu/drm/i915/i915_guc_reg.h     | 12 ++++++++--
>   drivers/gpu/drm/i915/intel_guc_loader.c | 40 ++++++++++++++++-----------------
>   2 files changed, 29 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/i915_guc_reg.h
> index 94ceee5..80786d9 100644
> --- a/drivers/gpu/drm/i915/i915_guc_reg.h
> +++ b/drivers/gpu/drm/i915/i915_guc_reg.h
> @@ -27,10 +27,12 @@
>   /* Definitions of GuC H/W registers, bits, etc */
>
>   #define GUC_STATUS			_MMIO(0xc000)
> -#define   GS_MIA_IN_RESET		(1 << 0)
> +#define   GS_RESET_SHIFT		0
> +#define   GS_MIA_IN_RESET		  (0x01 << GS_RESET_SHIFT)
>   #define   GS_BOOTROM_SHIFT		1
>   #define   GS_BOOTROM_MASK		  (0x7F << GS_BOOTROM_SHIFT)
>   #define   GS_BOOTROM_RSA_FAILED		  (0x50 << GS_BOOTROM_SHIFT)
> +#define   GS_BOOTROM_JUMP_PASSED	  (0x76 << GS_BOOTROM_SHIFT)
>   #define   GS_UKERNEL_SHIFT		8
>   #define   GS_UKERNEL_MASK		  (0xFF << GS_UKERNEL_SHIFT)
>   #define   GS_UKERNEL_LAPIC_DONE		  (0x30 << GS_UKERNEL_SHIFT)
> @@ -38,7 +40,13 @@
>   #define   GS_UKERNEL_READY		  (0xF0 << GS_UKERNEL_SHIFT)
>   #define   GS_MIA_SHIFT			16
>   #define   GS_MIA_MASK			  (0x07 << GS_MIA_SHIFT)
> -#define   GS_MIA_CORE_STATE		  (1 << GS_MIA_SHIFT)
> +#define   GS_MIA_CORE_STATE		  (0x01 << GS_MIA_SHIFT)
> +#define   GS_MIA_HALT_REQUESTED		  (0x02 << GS_MIA_SHIFT)
> +#define   GS_MIA_ISR_ENTRY		  (0x04 << GS_MIA_SHIFT)
> +#define   GS_AUTH_STATUS_SHIFT		30
> +#define   GS_AUTH_STATUS_MASK		  (0x03 << GS_AUTH_STATUS_SHIFT)
> +#define   GS_AUTH_STATUS_BAD		  (0x01 << GS_AUTH_STATUS_SHIFT)
> +#define   GS_AUTH_STATUS_GOOD		  (0x02 << GS_AUTH_STATUS_SHIFT)
>
>   #define SOFT_SCRATCH(n)			_MMIO(0xc180 + (n) * 4)
>   #define SOFT_SCRATCH_COUNT		16
> diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
> index a07c228..a875936 100644
> --- a/drivers/gpu/drm/i915/intel_guc_loader.c
> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
> @@ -387,7 +387,7 @@ int intel_guc_ucode_load(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
> -	int err = 0;
> +	int retries, err = 0;
>
>   	if (!i915.enable_guc_submission)
>   		return 0;
> @@ -441,29 +441,26 @@ int intel_guc_ucode_load(struct drm_device *dev)
>   	 * steppings also so this is extended as well.
>   	 */
>   	/* WaEnableGuCBootHashCheckNotSet:skl,bxt */
> -	err = guc_ucode_xfer(dev_priv);
> -	if (err) {
> -		int retries = 3;
> -
> -		DRM_ERROR("GuC fw load failed, err=%d, attempting reset and retry\n", err);
> -
> -		while (retries--) {
> -			err = i915_reset_guc(dev_priv);
> -			if (err)
> -				break;
> -
> -			err = guc_ucode_xfer(dev_priv);
> -			if (!err) {
> -				DRM_DEBUG_DRIVER("GuC fw reload succeeded after reset\n");
> -				break;
> -			}
> -			DRM_DEBUG_DRIVER("GuC fw reload retries left: %d\n", retries);
> -		}
> -
> +	for (retries = 3; ; ) {
> +		/*
> +		 * Always reset the GuC just before (re)loading, so
> +		 * that the state and timing are fairly predictable
> +		 */
> +		err = i915_reset_guc(dev_priv);
>   		if (err) {
> -			DRM_ERROR("GuC fw reload attempt failed, ret=%d\n", err);
> +			DRM_ERROR("GuC reset failed, err %d\n", err);
>   			goto fail;
>   		}
> +
> +		err = guc_ucode_xfer(dev_priv);
> +		if (!err)
> +			break;
> +
> +		if (--retries == 0)
> +			goto fail;
> +
> +		DRM_INFO("GuC fw load failed, err %d; will reset and "
> +			"retry %d more time(s)\n", err, retries);
>   	}
>
>   	guc_fw->guc_fw_load_status = GUC_FIRMWARE_SUCCESS;
> @@ -485,6 +482,7 @@ int intel_guc_ucode_load(struct drm_device *dev)
>   	return 0;
>
>   fail:
> +	DRM_ERROR("GuC firmware load failed, err %d\n", err);
>   	if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_PENDING)
>   		guc_fw->guc_fw_load_status = GUC_FIRMWARE_FAIL;
>
>
looks good to me,
Reviewed-by: Arun Siluvery <arun.siluvery@linux.intel.com>

regards
Arun
arun.siluvery@linux.intel.com March 22, 2016, 6:42 p.m. UTC | #2
On 22/03/2016 13:32, Arun Siluvery wrote:
> On 21/03/2016 10:16, Dave Gordon wrote:
>> After a suspend-resume cycle, the resumed kernel has no idea what the
>> booted kernel may have done to the GuC before replacing itself with the
>> resumed image. In particular, it may have already loaded the GuC with
>> firmware, which will then cause this kernel's attempt to (re)load the
>> firmware to fail (GuC program memory is write-once!).
>>
>> So here we choose to always reset the GuC just before (re)loading the
>> firmware, so the hardware should then be in a well-known state, and we
>> may even avoid some of the issues arising from unpredictable timing.
>>
>> Also added some more fields & values to the definition of the GUC_STATUS
>> register, which is the key diagnostic indicator if the GuC load fails.
>>
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
>> Cc: Alex Dai <yu.dai@intel.com>
>>
> Please add bugzilla reference.

https://bugs.freedesktop.org/show_bug.cgi?id=94390

regards
Arun

>
>> ---
>>   drivers/gpu/drm/i915/i915_guc_reg.h     | 12 ++++++++--
>>   drivers/gpu/drm/i915/intel_guc_loader.c | 40
>> ++++++++++++++++-----------------
>>   2 files changed, 29 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h
>> b/drivers/gpu/drm/i915/i915_guc_reg.h
>> index 94ceee5..80786d9 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_guc_reg.h
>> @@ -27,10 +27,12 @@
>>   /* Definitions of GuC H/W registers, bits, etc */
>>
>>   #define GUC_STATUS            _MMIO(0xc000)
>> -#define   GS_MIA_IN_RESET        (1 << 0)
>> +#define   GS_RESET_SHIFT        0
>> +#define   GS_MIA_IN_RESET          (0x01 << GS_RESET_SHIFT)
>>   #define   GS_BOOTROM_SHIFT        1
>>   #define   GS_BOOTROM_MASK          (0x7F << GS_BOOTROM_SHIFT)
>>   #define   GS_BOOTROM_RSA_FAILED          (0x50 << GS_BOOTROM_SHIFT)
>> +#define   GS_BOOTROM_JUMP_PASSED      (0x76 << GS_BOOTROM_SHIFT)
>>   #define   GS_UKERNEL_SHIFT        8
>>   #define   GS_UKERNEL_MASK          (0xFF << GS_UKERNEL_SHIFT)
>>   #define   GS_UKERNEL_LAPIC_DONE          (0x30 << GS_UKERNEL_SHIFT)
>> @@ -38,7 +40,13 @@
>>   #define   GS_UKERNEL_READY          (0xF0 << GS_UKERNEL_SHIFT)
>>   #define   GS_MIA_SHIFT            16
>>   #define   GS_MIA_MASK              (0x07 << GS_MIA_SHIFT)
>> -#define   GS_MIA_CORE_STATE          (1 << GS_MIA_SHIFT)
>> +#define   GS_MIA_CORE_STATE          (0x01 << GS_MIA_SHIFT)
>> +#define   GS_MIA_HALT_REQUESTED          (0x02 << GS_MIA_SHIFT)
>> +#define   GS_MIA_ISR_ENTRY          (0x04 << GS_MIA_SHIFT)
>> +#define   GS_AUTH_STATUS_SHIFT        30
>> +#define   GS_AUTH_STATUS_MASK          (0x03 << GS_AUTH_STATUS_SHIFT)
>> +#define   GS_AUTH_STATUS_BAD          (0x01 << GS_AUTH_STATUS_SHIFT)
>> +#define   GS_AUTH_STATUS_GOOD          (0x02 << GS_AUTH_STATUS_SHIFT)
>>
>>   #define SOFT_SCRATCH(n)            _MMIO(0xc180 + (n) * 4)
>>   #define SOFT_SCRATCH_COUNT        16
>> diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c
>> b/drivers/gpu/drm/i915/intel_guc_loader.c
>> index a07c228..a875936 100644
>> --- a/drivers/gpu/drm/i915/intel_guc_loader.c
>> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
>> @@ -387,7 +387,7 @@ int intel_guc_ucode_load(struct drm_device *dev)
>>   {
>>       struct drm_i915_private *dev_priv = dev->dev_private;
>>       struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
>> -    int err = 0;
>> +    int retries, err = 0;
>>
>>       if (!i915.enable_guc_submission)
>>           return 0;
>> @@ -441,29 +441,26 @@ int intel_guc_ucode_load(struct drm_device *dev)
>>        * steppings also so this is extended as well.
>>        */
>>       /* WaEnableGuCBootHashCheckNotSet:skl,bxt */
>> -    err = guc_ucode_xfer(dev_priv);
>> -    if (err) {
>> -        int retries = 3;
>> -
>> -        DRM_ERROR("GuC fw load failed, err=%d, attempting reset and
>> retry\n", err);
>> -
>> -        while (retries--) {
>> -            err = i915_reset_guc(dev_priv);
>> -            if (err)
>> -                break;
>> -
>> -            err = guc_ucode_xfer(dev_priv);
>> -            if (!err) {
>> -                DRM_DEBUG_DRIVER("GuC fw reload succeeded after
>> reset\n");
>> -                break;
>> -            }
>> -            DRM_DEBUG_DRIVER("GuC fw reload retries left: %d\n",
>> retries);
>> -        }
>> -
>> +    for (retries = 3; ; ) {
>> +        /*
>> +         * Always reset the GuC just before (re)loading, so
>> +         * that the state and timing are fairly predictable
>> +         */
>> +        err = i915_reset_guc(dev_priv);
>>           if (err) {
>> -            DRM_ERROR("GuC fw reload attempt failed, ret=%d\n", err);
>> +            DRM_ERROR("GuC reset failed, err %d\n", err);
>>               goto fail;
>>           }
>> +
>> +        err = guc_ucode_xfer(dev_priv);
>> +        if (!err)
>> +            break;
>> +
>> +        if (--retries == 0)
>> +            goto fail;
>> +
>> +        DRM_INFO("GuC fw load failed, err %d; will reset and "
>> +            "retry %d more time(s)\n", err, retries);
>>       }
>>
>>       guc_fw->guc_fw_load_status = GUC_FIRMWARE_SUCCESS;
>> @@ -485,6 +482,7 @@ int intel_guc_ucode_load(struct drm_device *dev)
>>       return 0;
>>
>>   fail:
>> +    DRM_ERROR("GuC firmware load failed, err %d\n", err);
>>       if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_PENDING)
>>           guc_fw->guc_fw_load_status = GUC_FIRMWARE_FAIL;
>>
>>
> looks good to me,
> Reviewed-by: Arun Siluvery <arun.siluvery@linux.intel.com>
>
> regards
> Arun
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/i915_guc_reg.h
index 94ceee5..80786d9 100644
--- a/drivers/gpu/drm/i915/i915_guc_reg.h
+++ b/drivers/gpu/drm/i915/i915_guc_reg.h
@@ -27,10 +27,12 @@ 
 /* Definitions of GuC H/W registers, bits, etc */
 
 #define GUC_STATUS			_MMIO(0xc000)
-#define   GS_MIA_IN_RESET		(1 << 0)
+#define   GS_RESET_SHIFT		0
+#define   GS_MIA_IN_RESET		  (0x01 << GS_RESET_SHIFT)
 #define   GS_BOOTROM_SHIFT		1
 #define   GS_BOOTROM_MASK		  (0x7F << GS_BOOTROM_SHIFT)
 #define   GS_BOOTROM_RSA_FAILED		  (0x50 << GS_BOOTROM_SHIFT)
+#define   GS_BOOTROM_JUMP_PASSED	  (0x76 << GS_BOOTROM_SHIFT)
 #define   GS_UKERNEL_SHIFT		8
 #define   GS_UKERNEL_MASK		  (0xFF << GS_UKERNEL_SHIFT)
 #define   GS_UKERNEL_LAPIC_DONE		  (0x30 << GS_UKERNEL_SHIFT)
@@ -38,7 +40,13 @@ 
 #define   GS_UKERNEL_READY		  (0xF0 << GS_UKERNEL_SHIFT)
 #define   GS_MIA_SHIFT			16
 #define   GS_MIA_MASK			  (0x07 << GS_MIA_SHIFT)
-#define   GS_MIA_CORE_STATE		  (1 << GS_MIA_SHIFT)
+#define   GS_MIA_CORE_STATE		  (0x01 << GS_MIA_SHIFT)
+#define   GS_MIA_HALT_REQUESTED		  (0x02 << GS_MIA_SHIFT)
+#define   GS_MIA_ISR_ENTRY		  (0x04 << GS_MIA_SHIFT)
+#define   GS_AUTH_STATUS_SHIFT		30
+#define   GS_AUTH_STATUS_MASK		  (0x03 << GS_AUTH_STATUS_SHIFT)
+#define   GS_AUTH_STATUS_BAD		  (0x01 << GS_AUTH_STATUS_SHIFT)
+#define   GS_AUTH_STATUS_GOOD		  (0x02 << GS_AUTH_STATUS_SHIFT)
 
 #define SOFT_SCRATCH(n)			_MMIO(0xc180 + (n) * 4)
 #define SOFT_SCRATCH_COUNT		16
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index a07c228..a875936 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -387,7 +387,7 @@  int intel_guc_ucode_load(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
-	int err = 0;
+	int retries, err = 0;
 
 	if (!i915.enable_guc_submission)
 		return 0;
@@ -441,29 +441,26 @@  int intel_guc_ucode_load(struct drm_device *dev)
 	 * steppings also so this is extended as well.
 	 */
 	/* WaEnableGuCBootHashCheckNotSet:skl,bxt */
-	err = guc_ucode_xfer(dev_priv);
-	if (err) {
-		int retries = 3;
-
-		DRM_ERROR("GuC fw load failed, err=%d, attempting reset and retry\n", err);
-
-		while (retries--) {
-			err = i915_reset_guc(dev_priv);
-			if (err)
-				break;
-
-			err = guc_ucode_xfer(dev_priv);
-			if (!err) {
-				DRM_DEBUG_DRIVER("GuC fw reload succeeded after reset\n");
-				break;
-			}
-			DRM_DEBUG_DRIVER("GuC fw reload retries left: %d\n", retries);
-		}
-
+	for (retries = 3; ; ) {
+		/*
+		 * Always reset the GuC just before (re)loading, so
+		 * that the state and timing are fairly predictable
+		 */
+		err = i915_reset_guc(dev_priv);
 		if (err) {
-			DRM_ERROR("GuC fw reload attempt failed, ret=%d\n", err);
+			DRM_ERROR("GuC reset failed, err %d\n", err);
 			goto fail;
 		}
+
+		err = guc_ucode_xfer(dev_priv);
+		if (!err)
+			break;
+
+		if (--retries == 0)
+			goto fail;
+
+		DRM_INFO("GuC fw load failed, err %d; will reset and "
+			"retry %d more time(s)\n", err, retries);
 	}
 
 	guc_fw->guc_fw_load_status = GUC_FIRMWARE_SUCCESS;
@@ -485,6 +482,7 @@  int intel_guc_ucode_load(struct drm_device *dev)
 	return 0;
 
 fail:
+	DRM_ERROR("GuC firmware load failed, err %d\n", err);
 	if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_PENDING)
 		guc_fw->guc_fw_load_status = GUC_FIRMWARE_FAIL;