diff mbox series

[v13,03/13] virtio-gpu: Handle virtio_gpu_virgl_init() failure

Message ID 20240527030233.3775514-4-dmitry.osipenko@collabora.com (mailing list archive)
State New, archived
Headers show
Series Support blob memory and venus on qemu | expand

Commit Message

Dmitry Osipenko May 27, 2024, 3:02 a.m. UTC
virtio_gpu_virgl_init() may fail, leading to a further Qemu crash
because Qemu assumes it never fails. Check virtio_gpu_virgl_init()
return code and don't execute virtio commands on error. Failed
virtio_gpu_virgl_init() will result in a timed out virtio commands
for a guest OS.

Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
---
 hw/display/virtio-gpu-gl.c     | 29 +++++++++++++++++++++--------
 include/hw/virtio/virtio-gpu.h | 11 +++++++++--
 2 files changed, 30 insertions(+), 10 deletions(-)

Comments

Akihiko Odaki June 2, 2024, 5:34 a.m. UTC | #1
On 2024/05/27 12:02, Dmitry Osipenko wrote:
> virtio_gpu_virgl_init() may fail, leading to a further Qemu crash
> because Qemu assumes it never fails. Check virtio_gpu_virgl_init()
> return code and don't execute virtio commands on error. Failed
> virtio_gpu_virgl_init() will result in a timed out virtio commands
> for a guest OS.
> 
> Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> ---
>   hw/display/virtio-gpu-gl.c     | 29 +++++++++++++++++++++--------
>   include/hw/virtio/virtio-gpu.h | 11 +++++++++--
>   2 files changed, 30 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
> index e06be60dfbfc..38a2b1bd3916 100644
> --- a/hw/display/virtio-gpu-gl.c
> +++ b/hw/display/virtio-gpu-gl.c
> @@ -29,9 +29,14 @@ static void virtio_gpu_gl_update_cursor_data(VirtIOGPU *g,
>                                                struct virtio_gpu_scanout *s,
>                                                uint32_t resource_id)
>   {
> +    VirtIOGPUGL *gl = VIRTIO_GPU_GL(g);
>       uint32_t width, height;
>       uint32_t pixels, *data;
>   
> +    if (gl->renderer_state != RS_INITED) {
> +        return;
> +    }
> +
>       data = virgl_renderer_get_cursor_data(resource_id, &width, &height);
>       if (!data) {
>           return;
> @@ -65,13 +70,21 @@ static void virtio_gpu_gl_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
>           return;
>       }
>   
> -    if (!gl->renderer_inited) {
> -        virtio_gpu_virgl_init(g);
> -        gl->renderer_inited = true;
> -    }
> -    if (gl->renderer_reset) {
> -        gl->renderer_reset = false;
> +    switch (gl->renderer_state) {
> +    case RS_RESET:
>           virtio_gpu_virgl_reset(g);
> +        /* fallthrough */
> +    case RS_START:
> +        if (virtio_gpu_virgl_init(g)) {
> +            gl->renderer_state = RS_INIT_FAILED;
> +        } else {
> +            gl->renderer_state = RS_INITED;
> +        }
> +        break;
> +    case RS_INIT_FAILED:
> +        return;
> +    case RS_INITED:
> +        break;
>       }
>   
>       cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command));
> @@ -98,9 +111,9 @@ static void virtio_gpu_gl_reset(VirtIODevice *vdev)
>        * GL functions must be called with the associated GL context in main
>        * thread, and when the renderer is unblocked.
>        */
> -    if (gl->renderer_inited && !gl->renderer_reset) {
> +    if (gl->renderer_state == RS_INITED) {
>           virtio_gpu_virgl_reset_scanout(g);
> -        gl->renderer_reset = true;
> +        gl->renderer_state = RS_RESET;
>       }
>   }
>   
> diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
> index 7ff989a45a5c..6e71d799e5da 100644
> --- a/include/hw/virtio/virtio-gpu.h
> +++ b/include/hw/virtio/virtio-gpu.h
> @@ -224,11 +224,18 @@ struct VirtIOGPUClass {
>                                Error **errp);
>   };
>   
> +/* VirtIOGPUGL renderer states */
> +typedef enum {
> +    RS_START,       /* starting state */
> +    RS_INIT_FAILED, /* failed initialisation */

Is the distinction between RS_START and RS_INIT_FAILED really necessary?
Dmitry Osipenko June 3, 2024, 5:26 a.m. UTC | #2
On 6/2/24 08:34, Akihiko Odaki wrote:
>> +typedef enum {
>> +    RS_START,       /* starting state */
>> +    RS_INIT_FAILED, /* failed initialisation */
> 
> Is the distinction between RS_START and RS_INIT_FAILED really necessary?

The state stays in RS_INIT_FAILED once was failed until virtio-gpu is
reset, re-initializing virglrenderer isn't allowed in this state.

The RS_START state allows to initialize virglrenderer and moves to
either RS_INITED or RS_INIT_FAILED state after initialization.

The distinction is necessary
Akihiko Odaki June 3, 2024, 5:44 a.m. UTC | #3
On 2024/06/03 14:26, Dmitry Osipenko wrote:
> On 6/2/24 08:34, Akihiko Odaki wrote:
>>> +typedef enum {
>>> +    RS_START,       /* starting state */
>>> +    RS_INIT_FAILED, /* failed initialisation */
>>
>> Is the distinction between RS_START and RS_INIT_FAILED really necessary?
> 
> The state stays in RS_INIT_FAILED once was failed until virtio-gpu is
> reset, re-initializing virglrenderer isn't allowed in this state.

Can you elaborate more? Why isn't re-initializing allowed?
Marc-André Lureau June 4, 2024, 2:21 p.m. UTC | #4
Hi

On Mon, May 27, 2024 at 7:03 AM Dmitry Osipenko <
dmitry.osipenko@collabora.com> wrote:

> virtio_gpu_virgl_init() may fail, leading to a further Qemu crash
> because Qemu assumes it never fails. Check virtio_gpu_virgl_init()
> return code and don't execute virtio commands on error. Failed
> virtio_gpu_virgl_init() will result in a timed out virtio commands
> for a guest OS.
>
> Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> ---
>  hw/display/virtio-gpu-gl.c     | 29 +++++++++++++++++++++--------
>  include/hw/virtio/virtio-gpu.h | 11 +++++++++--
>  2 files changed, 30 insertions(+), 10 deletions(-)
>
> diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
> index e06be60dfbfc..38a2b1bd3916 100644
> --- a/hw/display/virtio-gpu-gl.c
> +++ b/hw/display/virtio-gpu-gl.c
> @@ -29,9 +29,14 @@ static void virtio_gpu_gl_update_cursor_data(VirtIOGPU
> *g,
>                                               struct virtio_gpu_scanout *s,
>                                               uint32_t resource_id)
>  {
> +    VirtIOGPUGL *gl = VIRTIO_GPU_GL(g);
>      uint32_t width, height;
>      uint32_t pixels, *data;
>
> +    if (gl->renderer_state != RS_INITED) {
> +        return;
> +    }
> +
>      data = virgl_renderer_get_cursor_data(resource_id, &width, &height);
>      if (!data) {
>          return;
> @@ -65,13 +70,21 @@ static void virtio_gpu_gl_handle_ctrl(VirtIODevice
> *vdev, VirtQueue *vq)
>          return;
>      }
>
> -    if (!gl->renderer_inited) {
> -        virtio_gpu_virgl_init(g);
> -        gl->renderer_inited = true;
> -    }
> -    if (gl->renderer_reset) {
> -        gl->renderer_reset = false;
> +    switch (gl->renderer_state) {
> +    case RS_RESET:
>          virtio_gpu_virgl_reset(g);
> +        /* fallthrough */
> +    case RS_START:
> +        if (virtio_gpu_virgl_init(g)) {
> +            gl->renderer_state = RS_INIT_FAILED;
> +        } else {
> +            gl->renderer_state = RS_INITED;
> +        }
> +        break;
> +    case RS_INIT_FAILED:
> +        return;
> +    case RS_INITED:
> +        break;
>      }
>
>
This still lets it go through the cmd processing after setting
gl->renderer_state = RS_INIT_FAILED, the first time.


>      cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command));
> @@ -98,9 +111,9 @@ static void virtio_gpu_gl_reset(VirtIODevice *vdev)
>       * GL functions must be called with the associated GL context in main
>       * thread, and when the renderer is unblocked.
>       */
> -    if (gl->renderer_inited && !gl->renderer_reset) {
> +    if (gl->renderer_state == RS_INITED) {
>          virtio_gpu_virgl_reset_scanout(g);
> -        gl->renderer_reset = true;
> +        gl->renderer_state = RS_RESET;
>      }
>  }
>
> diff --git a/include/hw/virtio/virtio-gpu.h
> b/include/hw/virtio/virtio-gpu.h
> index 7ff989a45a5c..6e71d799e5da 100644
> --- a/include/hw/virtio/virtio-gpu.h
> +++ b/include/hw/virtio/virtio-gpu.h
> @@ -224,11 +224,18 @@ struct VirtIOGPUClass {
>                               Error **errp);
>  };
>
> +/* VirtIOGPUGL renderer states */
> +typedef enum {
> +    RS_START,       /* starting state */
> +    RS_INIT_FAILED, /* failed initialisation */
> +    RS_INITED,      /* initialised and working */
> +    RS_RESET,       /* inited and reset pending, moves to start after
> reset */
> +} RenderState;
> +
>  struct VirtIOGPUGL {
>      struct VirtIOGPU parent_obj;
>
> -    bool renderer_inited;
> -    bool renderer_reset;
> +    RenderState renderer_state;
>
>      QEMUTimer *fence_poll;
>      QEMUTimer *print_stats;
> --
> 2.44.0
>
>
Dmitry Osipenko June 9, 2024, 7:02 p.m. UTC | #5
On 6/3/24 08:44, Akihiko Odaki wrote:
> On 2024/06/03 14:26, Dmitry Osipenko wrote:
>> On 6/2/24 08:34, Akihiko Odaki wrote:
>>>> +typedef enum {
>>>> +    RS_START,       /* starting state */
>>>> +    RS_INIT_FAILED, /* failed initialisation */
>>>
>>> Is the distinction between RS_START and RS_INIT_FAILED really necessary?
>>
>> The state stays in RS_INIT_FAILED once was failed until virtio-gpu is
>> reset, re-initializing virglrenderer isn't allowed in this state.
> 
> Can you elaborate more? Why isn't re-initializing allowed?

In practice, if virglrenderer initialization failed once, it will fail
second time. Otherwise we will be retrying to init endlessly because
guest won't stop sending virgl commands even if they all are timing out.
Each initialization failure produces a error msg.
Dmitry Osipenko June 9, 2024, 7:05 p.m. UTC | #6
On 6/4/24 17:21, Marc-André Lureau wrote:
>> @@ -65,13 +70,21 @@ static void virtio_gpu_gl_handle_ctrl(VirtIODevice
>> *vdev, VirtQueue *vq)
>>          return;
>>      }
>>
>> -    if (!gl->renderer_inited) {
>> -        virtio_gpu_virgl_init(g);
>> -        gl->renderer_inited = true;
>> -    }
>> -    if (gl->renderer_reset) {
>> -        gl->renderer_reset = false;
>> +    switch (gl->renderer_state) {
>> +    case RS_RESET:
>>          virtio_gpu_virgl_reset(g);
>> +        /* fallthrough */
>> +    case RS_START:
>> +        if (virtio_gpu_virgl_init(g)) {
>> +            gl->renderer_state = RS_INIT_FAILED;
>> +        } else {
>> +            gl->renderer_state = RS_INITED;
>> +        }
>> +        break;
>> +    case RS_INIT_FAILED:
>> +        return;
>> +    case RS_INITED:
>> +        break;
>>      }
>>
>>
> This still lets it go through the cmd processing after setting
> gl->renderer_state = RS_INIT_FAILED, the first time.

Good catch, thanks!
Akihiko Odaki June 10, 2024, 3:38 a.m. UTC | #7
On 2024/06/10 4:02, Dmitry Osipenko wrote:
> On 6/3/24 08:44, Akihiko Odaki wrote:
>> On 2024/06/03 14:26, Dmitry Osipenko wrote:
>>> On 6/2/24 08:34, Akihiko Odaki wrote:
>>>>> +typedef enum {
>>>>> +    RS_START,       /* starting state */
>>>>> +    RS_INIT_FAILED, /* failed initialisation */
>>>>
>>>> Is the distinction between RS_START and RS_INIT_FAILED really necessary?
>>>
>>> The state stays in RS_INIT_FAILED once was failed until virtio-gpu is
>>> reset, re-initializing virglrenderer isn't allowed in this state.
>>
>> Can you elaborate more? Why isn't re-initializing allowed?
> 
> In practice, if virglrenderer initialization failed once, it will fail
> second time. Otherwise we will be retrying to init endlessly because
> guest won't stop sending virgl commands even if they all are timing out.
> Each initialization failure produces a error msg.
>
I see.

A better solution is to add a new function to GraphicHwOps to call back 
after initializating the displays and before starting the guest. You can 
do virgl initialization in such a function, and exit(1) if the 
initialization fails because the guest has not started yet, saving this 
enum. I don't require you to make such a change however; this is not a 
regression of your patches so you have no obligation to fix it.
Dmitry Osipenko June 15, 2024, 10:51 p.m. UTC | #8
On 6/10/24 06:38, Akihiko Odaki wrote:
> On 2024/06/10 4:02, Dmitry Osipenko wrote:
>> On 6/3/24 08:44, Akihiko Odaki wrote:
>>> On 2024/06/03 14:26, Dmitry Osipenko wrote:
>>>> On 6/2/24 08:34, Akihiko Odaki wrote:
>>>>>> +typedef enum {
>>>>>> +    RS_START,       /* starting state */
>>>>>> +    RS_INIT_FAILED, /* failed initialisation */
>>>>>
>>>>> Is the distinction between RS_START and RS_INIT_FAILED really
>>>>> necessary?
>>>>
>>>> The state stays in RS_INIT_FAILED once was failed until virtio-gpu is
>>>> reset, re-initializing virglrenderer isn't allowed in this state.
>>>
>>> Can you elaborate more? Why isn't re-initializing allowed?
>>
>> In practice, if virglrenderer initialization failed once, it will fail
>> second time. Otherwise we will be retrying to init endlessly because
>> guest won't stop sending virgl commands even if they all are timing out.
>> Each initialization failure produces a error msg.
>>
> I see.
> 
> A better solution is to add a new function to GraphicHwOps to call back
> after initializating the displays and before starting the guest. You can
> do virgl initialization in such a function, and exit(1) if the
> initialization fails because the guest has not started yet, saving this
> enum. I don't require you to make such a change however; this is not a
> regression of your patches so you have no obligation to fix it.

I'll keep this idea for a follow up patch, thanks! It will take me some
extra time to look through the display code, making sure that callback
is added properly and nothing is missed.
diff mbox series

Patch

diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
index e06be60dfbfc..38a2b1bd3916 100644
--- a/hw/display/virtio-gpu-gl.c
+++ b/hw/display/virtio-gpu-gl.c
@@ -29,9 +29,14 @@  static void virtio_gpu_gl_update_cursor_data(VirtIOGPU *g,
                                              struct virtio_gpu_scanout *s,
                                              uint32_t resource_id)
 {
+    VirtIOGPUGL *gl = VIRTIO_GPU_GL(g);
     uint32_t width, height;
     uint32_t pixels, *data;
 
+    if (gl->renderer_state != RS_INITED) {
+        return;
+    }
+
     data = virgl_renderer_get_cursor_data(resource_id, &width, &height);
     if (!data) {
         return;
@@ -65,13 +70,21 @@  static void virtio_gpu_gl_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
         return;
     }
 
-    if (!gl->renderer_inited) {
-        virtio_gpu_virgl_init(g);
-        gl->renderer_inited = true;
-    }
-    if (gl->renderer_reset) {
-        gl->renderer_reset = false;
+    switch (gl->renderer_state) {
+    case RS_RESET:
         virtio_gpu_virgl_reset(g);
+        /* fallthrough */
+    case RS_START:
+        if (virtio_gpu_virgl_init(g)) {
+            gl->renderer_state = RS_INIT_FAILED;
+        } else {
+            gl->renderer_state = RS_INITED;
+        }
+        break;
+    case RS_INIT_FAILED:
+        return;
+    case RS_INITED:
+        break;
     }
 
     cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command));
@@ -98,9 +111,9 @@  static void virtio_gpu_gl_reset(VirtIODevice *vdev)
      * GL functions must be called with the associated GL context in main
      * thread, and when the renderer is unblocked.
      */
-    if (gl->renderer_inited && !gl->renderer_reset) {
+    if (gl->renderer_state == RS_INITED) {
         virtio_gpu_virgl_reset_scanout(g);
-        gl->renderer_reset = true;
+        gl->renderer_state = RS_RESET;
     }
 }
 
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index 7ff989a45a5c..6e71d799e5da 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -224,11 +224,18 @@  struct VirtIOGPUClass {
                              Error **errp);
 };
 
+/* VirtIOGPUGL renderer states */
+typedef enum {
+    RS_START,       /* starting state */
+    RS_INIT_FAILED, /* failed initialisation */
+    RS_INITED,      /* initialised and working */
+    RS_RESET,       /* inited and reset pending, moves to start after reset */
+} RenderState;
+
 struct VirtIOGPUGL {
     struct VirtIOGPU parent_obj;
 
-    bool renderer_inited;
-    bool renderer_reset;
+    RenderState renderer_state;
 
     QEMUTimer *fence_poll;
     QEMUTimer *print_stats;