diff mbox series

[V6,3/5] drm/amdgpu: use drm_file_err in fence timeouts

Message ID 20250417123155.4002358-3-sunil.khatri@amd.com (mailing list archive)
State New
Headers show
Series [V6,1/5] drm: add drm_file_err function to add process info | expand

Commit Message

Sunil Khatri April 17, 2025, 12:31 p.m. UTC
use drm_file_err instead of DRM_ERROR which adds
process and pid information in the userqueue error
logging.

Sample log:
[   42.444297] [drm:amdgpu_userqueue_wait_for_signal [amdgpu]] *ERROR* Timed out waiting for fence f=000000001c74d978 for comm:Xwayland pid:3427
[   42.444669] [drm:amdgpu_userqueue_suspend [amdgpu]] *ERROR* Not suspending userqueue, timeout waiting for comm:Xwayland pid:3427
[   42.824729] [drm:amdgpu_userqueue_wait_for_signal [amdgpu]] *ERROR* Timed out waiting for fence f=0000000074407d3e for comm:systemd-logind pid:1058
[   42.825082] [drm:amdgpu_userqueue_suspend [amdgpu]] *ERROR* Not suspending userqueue, timeout waiting for comm:systemd-logind pid:1058

Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

Comments

Tvrtko Ursulin April 17, 2025, 1:41 p.m. UTC | #1
On 17/04/2025 13:31, Sunil Khatri wrote:
> use drm_file_err instead of DRM_ERROR which adds
> process and pid information in the userqueue error
> logging.
> 
> Sample log:
> [   42.444297] [drm:amdgpu_userqueue_wait_for_signal [amdgpu]] *ERROR* Timed out waiting for fence f=000000001c74d978 for comm:Xwayland pid:3427
> [   42.444669] [drm:amdgpu_userqueue_suspend [amdgpu]] *ERROR* Not suspending userqueue, timeout waiting for comm:Xwayland pid:3427
> [   42.824729] [drm:amdgpu_userqueue_wait_for_signal [amdgpu]] *ERROR* Timed out waiting for fence f=0000000074407d3e for comm:systemd-logind pid:1058
> [   42.825082] [drm:amdgpu_userqueue_suspend [amdgpu]] *ERROR* Not suspending userqueue, timeout waiting for comm:systemd-logind pid:1058

If you have some oomph left after this many revisions in a short time it 
would be good to update the examples to latest format.

> Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 21 ++++++++++++-------
>   1 file changed, 13 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> index cd9dc0018ee6..613a3a63301b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> @@ -43,8 +43,9 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr,
>   	if (f && !dma_fence_is_signaled(f)) {
>   		ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
>   		if (ret <= 0) {
> -			DRM_ERROR("Timed out waiting for fence=%llu:%llu\n",
> -				  f->context, f->seqno);
> +			drm_file_err(uq_mgr->file,
> +				     "Timed out waiting for fence: context=%llu seqno:%llu\n",
> +				     f->context, f->seqno);

I would just go "...fence=%llu:%llu" for consistency with tracepoints 
but up to you.

>   			return;
>   		}
>   	}
> @@ -456,7 +457,8 @@ amdgpu_userqueue_resume_all(struct amdgpu_userq_mgr *uq_mgr)
>   	}
>   
>   	if (ret)
> -		DRM_ERROR("Failed to map all the queues\n");
> +		drm_file_err(uq_mgr->file, "Failed to map all the queues\n");
> +
>   	return ret;
>   }
>   
> @@ -614,7 +616,8 @@ amdgpu_userqueue_suspend_all(struct amdgpu_userq_mgr *uq_mgr)
>   	}
>   
>   	if (ret)
> -		DRM_ERROR("Couldn't unmap all the queues\n");
> +		drm_file_err(uq_mgr->file, "Couldn't unmap all the queues\n");
> +
>   	return ret;
>   }
>   
> @@ -631,8 +634,10 @@ amdgpu_userqueue_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
>   			continue;
>   		ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
>   		if (ret <= 0) {
> -			DRM_ERROR("Timed out waiting for fence=%llu:%llu\n",
> -				  f->context, f->seqno);
> +			drm_file_err(uq_mgr->file,
> +				     "Timed out waiting for fence: context=%llu seqno:%llu\n",
> +				     f->context, f->seqno);
> +
>   			return -ETIMEDOUT;
>   		}
>   	}
> @@ -651,13 +656,13 @@ amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr,
>   	/* Wait for any pending userqueue fence work to finish */
>   	ret = amdgpu_userqueue_wait_for_signal(uq_mgr);
>   	if (ret) {
> -		DRM_ERROR("Not suspending userqueue, timeout waiting for work\n");
> +		drm_file_err(uq_mgr->file, "Not suspending userqueue, timeout waiting\n");

...work?

Anyway, we can tidy it later.

With or without fence=%llu:%llu:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>

Regards,

Tvrtko

>   		return;
>   	}
>   
>   	ret = amdgpu_userqueue_suspend_all(uq_mgr);
>   	if (ret) {
> -		DRM_ERROR("Failed to evict userqueue\n");
> +		drm_file_err(uq_mgr->file, "Failed to evict userqueue\n");
>   		return;
>   	}
>
Khatri, Sunil April 17, 2025, 3:59 p.m. UTC | #2
On 4/17/2025 7:11 PM, Tvrtko Ursulin wrote:
>
> On 17/04/2025 13:31, Sunil Khatri wrote:
>> use drm_file_err instead of DRM_ERROR which adds
>> process and pid information in the userqueue error
>> logging.
>>
>> Sample log:
>> [   42.444297] [drm:amdgpu_userqueue_wait_for_signal [amdgpu]] 
>> *ERROR* Timed out waiting for fence f=000000001c74d978 for 
>> comm:Xwayland pid:3427
>> [   42.444669] [drm:amdgpu_userqueue_suspend [amdgpu]] *ERROR* Not 
>> suspending userqueue, timeout waiting for comm:Xwayland pid:3427
>> [   42.824729] [drm:amdgpu_userqueue_wait_for_signal [amdgpu]] 
>> *ERROR* Timed out waiting for fence f=0000000074407d3e for 
>> comm:systemd-logind pid:1058
>> [   42.825082] [drm:amdgpu_userqueue_suspend [amdgpu]] *ERROR* Not 
>> suspending userqueue, timeout waiting for comm:systemd-logind pid:1058
>
> If you have some oomph left after this many revisions in a short time 
> it would be good to update the examples to latest format.

Will push the changes in new patch set

regards

Sunil

>> Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 21 ++++++++++++-------
>>   1 file changed, 13 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> index cd9dc0018ee6..613a3a63301b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> @@ -43,8 +43,9 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr 
>> *uq_mgr,
>>       if (f && !dma_fence_is_signaled(f)) {
>>           ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
>>           if (ret <= 0) {
>> -            DRM_ERROR("Timed out waiting for fence=%llu:%llu\n",
>> -                  f->context, f->seqno);
>> +            drm_file_err(uq_mgr->file,
>> +                     "Timed out waiting for fence: context=%llu 
>> seqno:%llu\n",
>> +                     f->context, f->seqno);
>
> I would just go "...fence=%llu:%llu" for consistency with tracepoints 
> but up to you.
>
>>               return;
>>           }
>>       }
>> @@ -456,7 +457,8 @@ amdgpu_userqueue_resume_all(struct 
>> amdgpu_userq_mgr *uq_mgr)
>>       }
>>         if (ret)
>> -        DRM_ERROR("Failed to map all the queues\n");
>> +        drm_file_err(uq_mgr->file, "Failed to map all the queues\n");
>> +
>>       return ret;
>>   }
>>   @@ -614,7 +616,8 @@ amdgpu_userqueue_suspend_all(struct 
>> amdgpu_userq_mgr *uq_mgr)
>>       }
>>         if (ret)
>> -        DRM_ERROR("Couldn't unmap all the queues\n");
>> +        drm_file_err(uq_mgr->file, "Couldn't unmap all the queues\n");
>> +
>>       return ret;
>>   }
>>   @@ -631,8 +634,10 @@ amdgpu_userqueue_wait_for_signal(struct 
>> amdgpu_userq_mgr *uq_mgr)
>>               continue;
>>           ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
>>           if (ret <= 0) {
>> -            DRM_ERROR("Timed out waiting for fence=%llu:%llu\n",
>> -                  f->context, f->seqno);
>> +            drm_file_err(uq_mgr->file,
>> +                     "Timed out waiting for fence: context=%llu 
>> seqno:%llu\n",
>> +                     f->context, f->seqno);
>> +
>>               return -ETIMEDOUT;
>>           }
>>       }
>> @@ -651,13 +656,13 @@ amdgpu_userqueue_suspend(struct 
>> amdgpu_userq_mgr *uq_mgr,
>>       /* Wait for any pending userqueue fence work to finish */
>>       ret = amdgpu_userqueue_wait_for_signal(uq_mgr);
>>       if (ret) {
>> -        DRM_ERROR("Not suspending userqueue, timeout waiting for 
>> work\n");
>> +        drm_file_err(uq_mgr->file, "Not suspending userqueue, 
>> timeout waiting\n");
>
> ...work?
>
> Anyway, we can tidy it later.
>
> With or without fence=%llu:%llu:
>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
>
> Regards,
>
> Tvrtko
>
>>           return;
>>       }
>>         ret = amdgpu_userqueue_suspend_all(uq_mgr);
>>       if (ret) {
>> -        DRM_ERROR("Failed to evict userqueue\n");
>> +        drm_file_err(uq_mgr->file, "Failed to evict userqueue\n");
>>           return;
>>       }
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index cd9dc0018ee6..613a3a63301b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -43,8 +43,9 @@  amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr,
 	if (f && !dma_fence_is_signaled(f)) {
 		ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
 		if (ret <= 0) {
-			DRM_ERROR("Timed out waiting for fence=%llu:%llu\n",
-				  f->context, f->seqno);
+			drm_file_err(uq_mgr->file,
+				     "Timed out waiting for fence: context=%llu seqno:%llu\n",
+				     f->context, f->seqno);
 			return;
 		}
 	}
@@ -456,7 +457,8 @@  amdgpu_userqueue_resume_all(struct amdgpu_userq_mgr *uq_mgr)
 	}
 
 	if (ret)
-		DRM_ERROR("Failed to map all the queues\n");
+		drm_file_err(uq_mgr->file, "Failed to map all the queues\n");
+
 	return ret;
 }
 
@@ -614,7 +616,8 @@  amdgpu_userqueue_suspend_all(struct amdgpu_userq_mgr *uq_mgr)
 	}
 
 	if (ret)
-		DRM_ERROR("Couldn't unmap all the queues\n");
+		drm_file_err(uq_mgr->file, "Couldn't unmap all the queues\n");
+
 	return ret;
 }
 
@@ -631,8 +634,10 @@  amdgpu_userqueue_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
 			continue;
 		ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
 		if (ret <= 0) {
-			DRM_ERROR("Timed out waiting for fence=%llu:%llu\n",
-				  f->context, f->seqno);
+			drm_file_err(uq_mgr->file,
+				     "Timed out waiting for fence: context=%llu seqno:%llu\n",
+				     f->context, f->seqno);
+
 			return -ETIMEDOUT;
 		}
 	}
@@ -651,13 +656,13 @@  amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr,
 	/* Wait for any pending userqueue fence work to finish */
 	ret = amdgpu_userqueue_wait_for_signal(uq_mgr);
 	if (ret) {
-		DRM_ERROR("Not suspending userqueue, timeout waiting for work\n");
+		drm_file_err(uq_mgr->file, "Not suspending userqueue, timeout waiting\n");
 		return;
 	}
 
 	ret = amdgpu_userqueue_suspend_all(uq_mgr);
 	if (ret) {
-		DRM_ERROR("Failed to evict userqueue\n");
+		drm_file_err(uq_mgr->file, "Failed to evict userqueue\n");
 		return;
 	}