diff mbox

[2/3] drm/i915/guc: disable GuC submission earlier during GuC (re)load

Message ID 1465287291-2187-2-git-send-email-david.s.gordon@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Gordon June 7, 2016, 8:14 a.m. UTC
When resetting and reloading the GuC, the GuC submission management code
also needs to destroy and recreate the GuC client(s). Currently this is
done by a separate call from the GuC loader, but really, it's just an
internal detail of the submission code. So here we remove the call from
the loader (which is too late, really, because the GuC has already been
reloaded at this point) and put it into guc_submission_init() instead.
This means that any preexisting client is destroyed *before* the GuC
(re)load and then recreated after, iff the firmware was successfully
loaded. If the GuC reload fails, we don't recreate the client, so
fallback to execlists mode (if active) won't leak the client object
(previously, the now-unusable client would have been left allocated,
and leaked if the driver were unloaded).

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 15 ++++++++++-----
 drivers/gpu/drm/i915/intel_guc_loader.c    |  3 ---
 2 files changed, 10 insertions(+), 8 deletions(-)

Comments

Tvrtko Ursulin June 7, 2016, 9:51 a.m. UTC | #1
On 07/06/16 09:14, Dave Gordon wrote:
> When resetting and reloading the GuC, the GuC submission management code
> also needs to destroy and recreate the GuC client(s). Currently this is
> done by a separate call from the GuC loader, but really, it's just an
> internal detail of the submission code. So here we remove the call from
> the loader (which is too late, really, because the GuC has already been
> reloaded at this point) and put it into guc_submission_init() instead.
> This means that any preexisting client is destroyed *before* the GuC
> (re)load and then recreated after, iff the firmware was successfully
> loaded. If the GuC reload fails, we don't recreate the client, so
> fallback to execlists mode (if active) won't leak the client object
> (previously, the now-unusable client would have been left allocated,
> and leaked if the driver were unloaded).
>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_guc_submission.c | 15 ++++++++++-----
>   drivers/gpu/drm/i915/intel_guc_loader.c    |  3 ---
>   2 files changed, 10 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index ac72451..2db1182 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -657,6 +657,8 @@ static void guc_client_free(struct drm_device *dev,
>   	 */
>
>   	if (client->client_base) {
> +		uint16_t db_id = client->doorbell_id;
> +
>   		/*
>   		 * If we got as far as setting up a doorbell, make sure
>   		 * we shut it down before unmapping & deallocating the
> @@ -664,10 +666,11 @@ static void guc_client_free(struct drm_device *dev,
>   		 * GuC that we've finished with it, finally deallocate
>   		 * it in our bitmap
>   		 */
> -		if (client->doorbell_id != GUC_INVALID_DOORBELL_ID) {
> +		if (db_id != GUC_INVALID_DOORBELL_ID) {
>   			guc_disable_doorbell(guc, client);
> -			host2guc_release_doorbell(guc, client);
> -			release_doorbell(guc, client->doorbell_id);
> +			if (test_bit(db_id, guc->doorbell_bitmap))
> +				host2guc_release_doorbell(guc, client);
> +			release_doorbell(guc, db_id);
>   		}
>
>   		kunmap(kmap_to_page(client->client_base));
> @@ -912,6 +915,10 @@ int i915_guc_submission_init(struct drm_device *dev)
>   	const size_t gemsize = round_up(poolsize, PAGE_SIZE);
>   	struct intel_guc *guc = &dev_priv->guc;
>
> +	/* Wipe bitmap & delete client in case of reinitialisation */
> +	bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS);
> +	i915_guc_submission_disable(dev);
> +
>   	if (!i915.enable_guc_submission)
>   		return 0; /* not enabled  */
>
> @@ -923,9 +930,7 @@ int i915_guc_submission_init(struct drm_device *dev)
>   		return -ENOMEM;
>
>   	ida_init(&guc->ctx_ids);
> -
>   	guc_create_log(guc);
> -
>   	guc_create_ads(guc);
>
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
> index 4e34c2e..41f7c7d 100644
> --- a/drivers/gpu/drm/i915/intel_guc_loader.c
> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
> @@ -492,9 +492,6 @@ int intel_guc_setup(struct drm_device *dev)
>   		intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));
>
>   	if (i915.enable_guc_submission) {
> -		/* The execbuf_client will be recreated. Release it first. */
> -		i915_guc_submission_disable(dev);
> -
>   		err = i915_guc_submission_enable(dev);
>   		if (err)
>   			goto fail;
>

This fixes the errors on suspend/resume? It would be useful for the 
commit message to explain what was happening.

It is a bit strange since the first disable now comes before the init, 
but since the disable path already does handle that case I suppose it is OK.

Anyway,

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
Dave Gordon June 7, 2016, 10:13 a.m. UTC | #2
On 07/06/16 10:51, Tvrtko Ursulin wrote:
>
> On 07/06/16 09:14, Dave Gordon wrote:
>> When resetting and reloading the GuC, the GuC submission management code
>> also needs to destroy and recreate the GuC client(s). Currently this is
>> done by a separate call from the GuC loader, but really, it's just an
>> internal detail of the submission code. So here we remove the call from
>> the loader (which is too late, really, because the GuC has already been
>> reloaded at this point) and put it into guc_submission_init() instead.
>> This means that any preexisting client is destroyed *before* the GuC
>> (re)load and then recreated after, iff the firmware was successfully
>> loaded. If the GuC reload fails, we don't recreate the client, so
>> fallback to execlists mode (if active) won't leak the client object
>> (previously, the now-unusable client would have been left allocated,
>> and leaked if the driver were unloaded).
>>
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_guc_submission.c | 15 ++++++++++-----
>>   drivers/gpu/drm/i915/intel_guc_loader.c    |  3 ---
>>   2 files changed, 10 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index ac72451..2db1182 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -657,6 +657,8 @@ static void guc_client_free(struct drm_device *dev,
>>        */
>>
>>       if (client->client_base) {
>> +        uint16_t db_id = client->doorbell_id;
>> +
>>           /*
>>            * If we got as far as setting up a doorbell, make sure
>>            * we shut it down before unmapping & deallocating the
>> @@ -664,10 +666,11 @@ static void guc_client_free(struct drm_device *dev,
>>            * GuC that we've finished with it, finally deallocate
>>            * it in our bitmap
>>            */
>> -        if (client->doorbell_id != GUC_INVALID_DOORBELL_ID) {
>> +        if (db_id != GUC_INVALID_DOORBELL_ID) {
>>               guc_disable_doorbell(guc, client);
>> -            host2guc_release_doorbell(guc, client);
>> -            release_doorbell(guc, client->doorbell_id);
>> +            if (test_bit(db_id, guc->doorbell_bitmap))
>> +                host2guc_release_doorbell(guc, client);
>> +            release_doorbell(guc, db_id);
>>           }
>>
>>           kunmap(kmap_to_page(client->client_base));
>> @@ -912,6 +915,10 @@ int i915_guc_submission_init(struct drm_device *dev)
>>       const size_t gemsize = round_up(poolsize, PAGE_SIZE);
>>       struct intel_guc *guc = &dev_priv->guc;
>>
>> +    /* Wipe bitmap & delete client in case of reinitialisation */
>> +    bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS);
>> +    i915_guc_submission_disable(dev);
>> +
>>       if (!i915.enable_guc_submission)
>>           return 0; /* not enabled  */
>>
>> @@ -923,9 +930,7 @@ int i915_guc_submission_init(struct drm_device *dev)
>>           return -ENOMEM;
>>
>>       ida_init(&guc->ctx_ids);
>> -
>>       guc_create_log(guc);
>> -
>>       guc_create_ads(guc);
>>
>>       return 0;
>> diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c
>> b/drivers/gpu/drm/i915/intel_guc_loader.c
>> index 4e34c2e..41f7c7d 100644
>> --- a/drivers/gpu/drm/i915/intel_guc_loader.c
>> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
>> @@ -492,9 +492,6 @@ int intel_guc_setup(struct drm_device *dev)
>>           intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));
>>
>>       if (i915.enable_guc_submission) {
>> -        /* The execbuf_client will be recreated. Release it first. */
>> -        i915_guc_submission_disable(dev);
>> -
>>           err = i915_guc_submission_enable(dev);
>>           if (err)
>>               goto fail;
>
> This fixes the errors on suspend/resume? It would be useful for the
> commit message to explain what was happening.

The error message I've seen was actually a timeout from trying to talk 
to the GuC during a TDR reset. So this fixes it by *not* talking to the 
GuC during a reset - even if the GuC were still working, there's not 
much point in updating it just before the reload, which will reset all 
its state anyway.

> It is a bit strange since the first disable now comes before the init,

The first disable is now *inside* i915_guc_submission_init().
But it does come before the first i915_guc_submission_enable().
It's not an error to disable something that's not (yet) enabled :)

.Dave.

> but since the disable path already does handle that case I suppose it is
> OK.
>
> Anyway,
>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Regards,
>
> Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index ac72451..2db1182 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -657,6 +657,8 @@  static void guc_client_free(struct drm_device *dev,
 	 */
 
 	if (client->client_base) {
+		uint16_t db_id = client->doorbell_id;
+
 		/*
 		 * If we got as far as setting up a doorbell, make sure
 		 * we shut it down before unmapping & deallocating the
@@ -664,10 +666,11 @@  static void guc_client_free(struct drm_device *dev,
 		 * GuC that we've finished with it, finally deallocate
 		 * it in our bitmap
 		 */
-		if (client->doorbell_id != GUC_INVALID_DOORBELL_ID) {
+		if (db_id != GUC_INVALID_DOORBELL_ID) {
 			guc_disable_doorbell(guc, client);
-			host2guc_release_doorbell(guc, client);
-			release_doorbell(guc, client->doorbell_id);
+			if (test_bit(db_id, guc->doorbell_bitmap))
+				host2guc_release_doorbell(guc, client);
+			release_doorbell(guc, db_id);
 		}
 
 		kunmap(kmap_to_page(client->client_base));
@@ -912,6 +915,10 @@  int i915_guc_submission_init(struct drm_device *dev)
 	const size_t gemsize = round_up(poolsize, PAGE_SIZE);
 	struct intel_guc *guc = &dev_priv->guc;
 
+	/* Wipe bitmap & delete client in case of reinitialisation */
+	bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS);
+	i915_guc_submission_disable(dev);
+
 	if (!i915.enable_guc_submission)
 		return 0; /* not enabled  */
 
@@ -923,9 +930,7 @@  int i915_guc_submission_init(struct drm_device *dev)
 		return -ENOMEM;
 
 	ida_init(&guc->ctx_ids);
-
 	guc_create_log(guc);
-
 	guc_create_ads(guc);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 4e34c2e..41f7c7d 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -492,9 +492,6 @@  int intel_guc_setup(struct drm_device *dev)
 		intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));
 
 	if (i915.enable_guc_submission) {
-		/* The execbuf_client will be recreated. Release it first. */
-		i915_guc_submission_disable(dev);
-
 		err = i915_guc_submission_enable(dev);
 		if (err)
 			goto fail;