diff mbox series

[v1,3/6] drm/lima: set bus_stop bit before hard reset

Message ID 20240117031212.1104034-4-nunes.erico@gmail.com (mailing list archive)
State New, archived
Headers show
Series drm/lima: fixes and improvements to error recovery | expand

Commit Message

Erico Nunes Jan. 17, 2024, 3:12 a.m. UTC
This is required for reliable hard resets. Otherwise, doing a hard reset
while a task is still running (such as a task which is being stopped by
the drm_sched timeout handler) may result in random mmu write timeouts
or lockups which cause the entire gpu to hang.

Signed-off-by: Erico Nunes <nunes.erico@gmail.com>
---
 drivers/gpu/drm/lima/lima_pp.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

Comments

Vasily Khoruzhick Jan. 17, 2024, 6:23 p.m. UTC | #1
On Tue, Jan 16, 2024 at 7:12 PM Erico Nunes <nunes.erico@gmail.com> wrote:
>
> This is required for reliable hard resets. Otherwise, doing a hard reset
> while a task is still running (such as a task which is being stopped by
> the drm_sched timeout handler) may result in random mmu write timeouts
> or lockups which cause the entire gpu to hang.

It looks like Mali driver is doing the same, so it totally makes sense.

> Signed-off-by: Erico Nunes <nunes.erico@gmail.com>
Reviewed-by: Vasily Khoruzhick <anarsoul@gmail.com>

> ---
>  drivers/gpu/drm/lima/lima_pp.c | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
>
> diff --git a/drivers/gpu/drm/lima/lima_pp.c b/drivers/gpu/drm/lima/lima_pp.c
> index a8f8f63b8295..ac097dd75072 100644
> --- a/drivers/gpu/drm/lima/lima_pp.c
> +++ b/drivers/gpu/drm/lima/lima_pp.c
> @@ -168,6 +168,11 @@ static void lima_pp_write_frame(struct lima_ip *ip, u32 *frame, u32 *wb)
>         }
>  }
>
> +static int lima_pp_bus_stop_poll(struct lima_ip *ip)
> +{
> +       return !!(pp_read(LIMA_PP_STATUS) & LIMA_PP_STATUS_BUS_STOPPED);
> +}
> +
>  static int lima_pp_hard_reset_poll(struct lima_ip *ip)
>  {
>         pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC01A0000);
> @@ -181,6 +186,14 @@ static int lima_pp_hard_reset(struct lima_ip *ip)
>
>         pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC0FFE000);
>         pp_write(LIMA_PP_INT_MASK, 0);
> +
> +       pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_STOP_BUS);
> +       ret = lima_poll_timeout(ip, lima_pp_bus_stop_poll, 10, 100);
> +       if (ret) {
> +               dev_err(dev->dev, "pp %s bus stop timeout\n", lima_ip_name(ip));
> +               return ret;
> +       }
> +
>         pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_FORCE_RESET);
>         ret = lima_poll_timeout(ip, lima_pp_hard_reset_poll, 10, 100);
>         if (ret) {
> --
> 2.43.0
>
Qiang Yu Jan. 18, 2024, 2:01 a.m. UTC | #2
Do we need same for GP?

Regards,
Qiang

On Wed, Jan 17, 2024 at 11:12 AM Erico Nunes <nunes.erico@gmail.com> wrote:
>
> This is required for reliable hard resets. Otherwise, doing a hard reset
> while a task is still running (such as a task which is being stopped by
> the drm_sched timeout handler) may result in random mmu write timeouts
> or lockups which cause the entire gpu to hang.
>
> Signed-off-by: Erico Nunes <nunes.erico@gmail.com>
> ---
>  drivers/gpu/drm/lima/lima_pp.c | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
>
> diff --git a/drivers/gpu/drm/lima/lima_pp.c b/drivers/gpu/drm/lima/lima_pp.c
> index a8f8f63b8295..ac097dd75072 100644
> --- a/drivers/gpu/drm/lima/lima_pp.c
> +++ b/drivers/gpu/drm/lima/lima_pp.c
> @@ -168,6 +168,11 @@ static void lima_pp_write_frame(struct lima_ip *ip, u32 *frame, u32 *wb)
>         }
>  }
>
> +static int lima_pp_bus_stop_poll(struct lima_ip *ip)
> +{
> +       return !!(pp_read(LIMA_PP_STATUS) & LIMA_PP_STATUS_BUS_STOPPED);
> +}
> +
>  static int lima_pp_hard_reset_poll(struct lima_ip *ip)
>  {
>         pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC01A0000);
> @@ -181,6 +186,14 @@ static int lima_pp_hard_reset(struct lima_ip *ip)
>
>         pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC0FFE000);
>         pp_write(LIMA_PP_INT_MASK, 0);
> +
> +       pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_STOP_BUS);
> +       ret = lima_poll_timeout(ip, lima_pp_bus_stop_poll, 10, 100);
> +       if (ret) {
> +               dev_err(dev->dev, "pp %s bus stop timeout\n", lima_ip_name(ip));
> +               return ret;
> +       }
> +
>         pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_FORCE_RESET);
>         ret = lima_poll_timeout(ip, lima_pp_hard_reset_poll, 10, 100);
>         if (ret) {
> --
> 2.43.0
>
Erico Nunes Jan. 18, 2024, 11:43 a.m. UTC | #3
On Thu, Jan 18, 2024 at 3:01 AM Qiang Yu <yuq825@gmail.com> wrote:
>
> Do we need same for GP?

I don't have an issue reproducer for gp so far, but the hardware does
have the same bit and the mali driver does it for both gp and pp, so I
think we can also add it to gp.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/lima/lima_pp.c b/drivers/gpu/drm/lima/lima_pp.c
index a8f8f63b8295..ac097dd75072 100644
--- a/drivers/gpu/drm/lima/lima_pp.c
+++ b/drivers/gpu/drm/lima/lima_pp.c
@@ -168,6 +168,11 @@  static void lima_pp_write_frame(struct lima_ip *ip, u32 *frame, u32 *wb)
 	}
 }
 
+static int lima_pp_bus_stop_poll(struct lima_ip *ip)
+{
+	return !!(pp_read(LIMA_PP_STATUS) & LIMA_PP_STATUS_BUS_STOPPED);
+}
+
 static int lima_pp_hard_reset_poll(struct lima_ip *ip)
 {
 	pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC01A0000);
@@ -181,6 +186,14 @@  static int lima_pp_hard_reset(struct lima_ip *ip)
 
 	pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC0FFE000);
 	pp_write(LIMA_PP_INT_MASK, 0);
+
+	pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_STOP_BUS);
+	ret = lima_poll_timeout(ip, lima_pp_bus_stop_poll, 10, 100);
+	if (ret) {
+		dev_err(dev->dev, "pp %s bus stop timeout\n", lima_ip_name(ip));
+		return ret;
+	}
+
 	pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_FORCE_RESET);
 	ret = lima_poll_timeout(ip, lima_pp_hard_reset_poll, 10, 100);
 	if (ret) {