diff mbox series

[v2,2/3] drm/panthor: Report group as timedout when we fail to properly suspend

Message ID 20241028114815.3793855-3-boris.brezillon@collabora.com (mailing list archive)
State New, archived
Headers show
Series drm/panthor: Fix group state reporting | expand

Commit Message

Boris Brezillon Oct. 28, 2024, 11:48 a.m. UTC
If we don't do that, the group is considered usable by userspace, but
all further GROUP_SUBMIT will fail with -EINVAL.

Changes in v2:
- New patch

Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block")
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 drivers/gpu/drm/panthor/panthor_sched.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

Comments

Steven Price Oct. 28, 2024, 3:31 p.m. UTC | #1
On 28/10/2024 11:48, Boris Brezillon wrote:
> If we don't do that, the group is considered usable by userspace, but
> all further GROUP_SUBMIT will fail with -EINVAL.
> 
> Changes in v2:
> - New patch
> 
> Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block")
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>

I guess a failure to suspend is close enough to a time out. I think the
only case that might actually matter is dealt with by your next patch.

Reviewed-by: Steven Price <steven.price@arm.com>

> ---
>  drivers/gpu/drm/panthor/panthor_sched.c | 15 +++++++++++----
>  1 file changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
> index eda8fbb276b3..ef4bec7ff9c7 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -602,10 +602,11 @@ struct panthor_group {
>  	 * @timedout: True when a timeout occurred on any of the queues owned by
>  	 * this group.
>  	 *
> -	 * Timeouts can be reported by drm_sched or by the FW. In any case, any
> -	 * timeout situation is unrecoverable, and the group becomes useless.
> -	 * We simply wait for all references to be dropped so we can release the
> -	 * group object.
> +	 * Timeouts can be reported by drm_sched or by the FW. If a reset is required,
> +	 * and the group can't be suspended, this also leads to a timeout. In any case,
> +	 * any timeout situation is unrecoverable, and the group becomes useless. We
> +	 * simply wait for all references to be dropped so we can release the group
> +	 * object.
>  	 */
>  	bool timedout;
>  
> @@ -2687,6 +2688,12 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
>  		csgs_upd_ctx_init(&upd_ctx);
>  		while (slot_mask) {
>  			u32 csg_id = ffs(slot_mask) - 1;
> +			struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
> +
> +			/* We consider group suspension failures as fatal and flag the
> +			 * group as unusable by setting timedout=true.
> +			 */
> +			csg_slot->group->timedout = true;
>  
>  			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
>  						CSG_STATE_TERMINATE,
Liviu Dudau Oct. 29, 2024, 2:11 p.m. UTC | #2
On Mon, Oct 28, 2024 at 12:48:14PM +0100, Boris Brezillon wrote:
> If we don't do that, the group is considered usable by userspace, but
> all further GROUP_SUBMIT will fail with -EINVAL.
> 
> Changes in v2:
> - New patch
> 
> Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block")
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>

Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>

Best regards,
Liviu

> ---
>  drivers/gpu/drm/panthor/panthor_sched.c | 15 +++++++++++----
>  1 file changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
> index eda8fbb276b3..ef4bec7ff9c7 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -602,10 +602,11 @@ struct panthor_group {
>  	 * @timedout: True when a timeout occurred on any of the queues owned by
>  	 * this group.
>  	 *
> -	 * Timeouts can be reported by drm_sched or by the FW. In any case, any
> -	 * timeout situation is unrecoverable, and the group becomes useless.
> -	 * We simply wait for all references to be dropped so we can release the
> -	 * group object.
> +	 * Timeouts can be reported by drm_sched or by the FW. If a reset is required,
> +	 * and the group can't be suspended, this also leads to a timeout. In any case,
> +	 * any timeout situation is unrecoverable, and the group becomes useless. We
> +	 * simply wait for all references to be dropped so we can release the group
> +	 * object.
>  	 */
>  	bool timedout;
>  
> @@ -2687,6 +2688,12 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
>  		csgs_upd_ctx_init(&upd_ctx);
>  		while (slot_mask) {
>  			u32 csg_id = ffs(slot_mask) - 1;
> +			struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
> +
> +			/* We consider group suspension failures as fatal and flag the
> +			 * group as unusable by setting timedout=true.
> +			 */
> +			csg_slot->group->timedout = true;
>  
>  			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
>  						CSG_STATE_TERMINATE,
> -- 
> 2.46.2
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index eda8fbb276b3..ef4bec7ff9c7 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -602,10 +602,11 @@  struct panthor_group {
 	 * @timedout: True when a timeout occurred on any of the queues owned by
 	 * this group.
 	 *
-	 * Timeouts can be reported by drm_sched or by the FW. In any case, any
-	 * timeout situation is unrecoverable, and the group becomes useless.
-	 * We simply wait for all references to be dropped so we can release the
-	 * group object.
+	 * Timeouts can be reported by drm_sched or by the FW. If a reset is required,
+	 * and the group can't be suspended, this also leads to a timeout. In any case,
+	 * any timeout situation is unrecoverable, and the group becomes useless. We
+	 * simply wait for all references to be dropped so we can release the group
+	 * object.
 	 */
 	bool timedout;
 
@@ -2687,6 +2688,12 @@  void panthor_sched_suspend(struct panthor_device *ptdev)
 		csgs_upd_ctx_init(&upd_ctx);
 		while (slot_mask) {
 			u32 csg_id = ffs(slot_mask) - 1;
+			struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
+
+			/* We consider group suspension failures as fatal and flag the
+			 * group as unusable by setting timedout=true.
+			 */
+			csg_slot->group->timedout = true;
 
 			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
 						CSG_STATE_TERMINATE,