Message ID | 20241015043238.114034-6-dmitry.osipenko@collabora.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Support virtio-gpu DRM native context | expand |
On 2024/10/15 13:32, Dmitry Osipenko wrote: > Support asynchronous fencing feature of virglrenderer. It allows Qemu to > handle fence as soon as it's signalled instead of periodically polling > the fence status. This feature is required for enabling DRM context > support in Qemu because legacy fencing mode isn't supported for DRM > contexts in virglrenderer. > > Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com> > --- > hw/display/virtio-gpu-gl.c | 3 + > hw/display/virtio-gpu-virgl.c | 134 ++++++++++++++++++++++++++++----- > include/hw/virtio/virtio-gpu.h | 14 ++++ > 3 files changed, 133 insertions(+), 18 deletions(-) > > diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c > index 7c0e448b4661..53d938f23f20 100644 > --- a/hw/display/virtio-gpu-gl.c > +++ b/hw/display/virtio-gpu-gl.c > @@ -170,6 +170,9 @@ static void virtio_gpu_gl_device_unrealize(DeviceState *qdev) > if (gl->renderer_state >= RS_INITED) { > #if VIRGL_VERSION_MAJOR >= 1 > qemu_bh_delete(gl->cmdq_resume_bh); > + > + virtio_gpu_virgl_reset_async_fences(g); > + qemu_bh_delete(gl->async_fence_bh); > #endif > if (virtio_gpu_stats_enabled(g->parent_obj.conf)) { > timer_free(gl->print_stats); > diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c > index b32ce44ba2b1..ad6512987079 100644 > --- a/hw/display/virtio-gpu-virgl.c > +++ b/hw/display/virtio-gpu-virgl.c > @@ -891,6 +891,7 @@ static void virgl_cmd_set_scanout_blob(VirtIOGPU *g, > void virtio_gpu_virgl_process_cmd(VirtIOGPU *g, > struct virtio_gpu_ctrl_command *cmd) > { > + VirtIOGPUGL *gl = VIRTIO_GPU_GL(g); > bool cmd_suspended = false; > int ret; > > @@ -992,35 +993,117 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g, > > trace_virtio_gpu_fence_ctrl(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type); > > - ret = virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, 0); > - if (ret) > - qemu_log_mask(LOG_GUEST_ERROR, > - "%s: virgl_renderer_create_fence error: %s", > - __func__, strerror(-ret)); > + if (gl->context_fence_enabled && > + (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX)) { > + uint32_t flags = 0; > + > + ret = virgl_renderer_context_create_fence(cmd->cmd_hdr.ctx_id, flags, > + cmd->cmd_hdr.ring_idx, > + cmd->cmd_hdr.fence_id); > + if (ret) > + qemu_log_mask(LOG_GUEST_ERROR, > + "%s: virgl_renderer_context_create_fence error: %s", > + __func__, strerror(ret)); > + } else { > + ret = virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, 0); > + if (ret) > + qemu_log_mask(LOG_GUEST_ERROR, > + "%s: virgl_renderer_create_fence error: %s", > + __func__, strerror(-ret)); > + } > } > > -static void virgl_write_fence(void *opaque, uint32_t fence) > +static void virtio_gpu_virgl_async_fence_bh(void *opaque) > { > - VirtIOGPU *g = opaque; > + struct virtio_gpu_virgl_context_fence *f, *f_tmp; > struct virtio_gpu_ctrl_command *cmd, *tmp; > + VirtIOGPU *g = opaque; > + VirtIOGPUGL *gl = VIRTIO_GPU_GL(g); > > - QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) { > - /* > - * the guest can end up emitting fences out of order > - * so we should check all fenced cmds not just the first one. > - */ > - if (cmd->cmd_hdr.fence_id > fence) { > - continue; > + qemu_mutex_lock(&gl->async_fence_lock); > + > + QSLIST_FOREACH_SAFE(f, &gl->async_fenceq, next, f_tmp) { > + QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) { > + /* > + * the guest can end up emitting fences out of order > + * so we should check all fenced cmds not just the first one. > + */ > + if (cmd->cmd_hdr.fence_id > f->fence_id) { > + continue; > + } > + if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX) { > + if (cmd->cmd_hdr.ring_idx != f->ring_idx) { > + continue; > + } > + if (cmd->cmd_hdr.ctx_id != f->ctx_id) { > + continue; > + } > + } else if (f->ring_idx >= 0) { > + /* ctx0 GL-query fences don't have ring info */ > + continue; > + } > + virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA); > + QTAILQ_REMOVE(&g->fenceq, cmd, next); > + g_free(cmd); > } > - trace_virtio_gpu_fence_resp(cmd->cmd_hdr.fence_id); > - virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA); > - QTAILQ_REMOVE(&g->fenceq, cmd, next); > - g_free(cmd); > + > + trace_virtio_gpu_fence_resp(f->fence_id); > + QSLIST_REMOVE(&gl->async_fenceq, f, virtio_gpu_virgl_context_fence, > + next); > + g_free(f); > g->inflight--; > if (virtio_gpu_stats_enabled(g->parent_obj.conf)) { > trace_virtio_gpu_dec_inflight_fences(g->inflight); > } > } > + > + qemu_mutex_unlock(&gl->async_fence_lock); > +} > + > +void virtio_gpu_virgl_reset_async_fences(VirtIOGPU *g) > +{ > + struct virtio_gpu_virgl_context_fence *f, *f_tmp; > + VirtIOGPUGL *gl = VIRTIO_GPU_GL(g); > + > + QSLIST_FOREACH_SAFE(f, &gl->async_fenceq, next, f_tmp) { > + QSLIST_REMOVE(&gl->async_fenceq, f, virtio_gpu_virgl_context_fence, > + next); > + g_free(f); > + } > +} > + > +static void > +virtio_gpu_virgl_push_async_fence(VirtIOGPU *g, uint32_t ctx_id, > + int64_t ring_idx, uint64_t fence_id) > +{ > + struct virtio_gpu_virgl_context_fence *f; > + VirtIOGPUGL *gl = VIRTIO_GPU_GL(g); > + > + f = g_new(struct virtio_gpu_virgl_context_fence, 1); > + f->ctx_id = ctx_id; > + f->ring_idx = ring_idx; > + f->fence_id = fence_id; > + > + qemu_mutex_lock(&gl->async_fence_lock); > + QSLIST_INSERT_HEAD(&gl->async_fenceq, f, next); > + qemu_mutex_unlock(&gl->async_fence_lock); > + > + qemu_bh_schedule(gl->async_fence_bh); > +} > + > +static void virgl_write_fence(void *opaque, uint32_t fence) > +{ > + VirtIOGPU *g = opaque; > + > + virtio_gpu_virgl_push_async_fence(g, 0, -1, fence); > +} > + > +static void virgl_write_context_fence(void *opaque, uint32_t ctx_id, > + uint32_t ring_idx, uint64_t fence) > +{ > + VirtIOGPU *g = opaque; What about taking the BQL here instead of having a QEMUBH? > + > + virtio_gpu_virgl_push_async_fence(g, ctx_id, ring_idx, fence); > } > > static virgl_renderer_gl_context > @@ -1110,6 +1193,8 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g) > dpy_gfx_replace_surface(g->parent_obj.scanout[i].con, NULL); > dpy_gl_scanout_disable(g->parent_obj.scanout[i].con); > } > + > + virtio_gpu_virgl_reset_async_fences(g); > } > > void virtio_gpu_virgl_reset(VirtIOGPU *g) > @@ -1127,6 +1212,12 @@ int virtio_gpu_virgl_init(VirtIOGPU *g) > if (qemu_egl_display) { > virtio_gpu_3d_cbs.version = 4; > virtio_gpu_3d_cbs.get_egl_display = virgl_get_egl_display; > +#if VIRGL_VERSION_MAJOR >= 1 > + virtio_gpu_3d_cbs.write_context_fence = virgl_write_context_fence; > + flags |= VIRGL_RENDERER_ASYNC_FENCE_CB; > + flags |= VIRGL_RENDERER_THREAD_SYNC; > + gl->context_fence_enabled = true; > +#endif > } > #endif > #ifdef VIRGL_RENDERER_D3D11_SHARE_TEXTURE > @@ -1160,6 +1251,13 @@ int virtio_gpu_virgl_init(VirtIOGPU *g) > gl->cmdq_resume_bh = aio_bh_new(qemu_get_aio_context(), > virtio_gpu_virgl_resume_cmdq_bh, > g); > + > + gl->async_fence_bh = aio_bh_new(qemu_get_aio_context(), > + virtio_gpu_virgl_async_fence_bh, > + g); > + > + qemu_mutex_init(&gl->async_fence_lock); > + QSLIST_INIT(&gl->async_fenceq); > #endif > > return 0; > diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h > index 230fa0c4ee0a..5673f0be85f4 100644 > --- a/include/hw/virtio/virtio-gpu.h > +++ b/include/hw/virtio/virtio-gpu.h > @@ -229,6 +229,13 @@ struct VirtIOGPUClass { > Error **errp); > }; > > +struct virtio_gpu_virgl_context_fence { > + uint32_t ctx_id; > + int64_t ring_idx; > + uint64_t fence_id; > + QSLIST_ENTRY(virtio_gpu_virgl_context_fence) next; > +}; > + > /* VirtIOGPUGL renderer states */ > typedef enum { > RS_START, /* starting state */ > @@ -246,6 +253,12 @@ struct VirtIOGPUGL { > QEMUTimer *print_stats; > > QEMUBH *cmdq_resume_bh; > + > + QEMUBH *async_fence_bh; > + QemuMutex async_fence_lock; > + QSLIST_HEAD(, virtio_gpu_virgl_context_fence) async_fenceq; > + > + bool context_fence_enabled; > }; > > struct VhostUserGPU { > @@ -360,5 +373,6 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g); > void virtio_gpu_virgl_reset(VirtIOGPU *g); > int virtio_gpu_virgl_init(VirtIOGPU *g); > GArray *virtio_gpu_virgl_get_capsets(VirtIOGPU *g); > +void virtio_gpu_virgl_reset_async_fences(VirtIOGPU *g); > > #endif
On 10/18/24 08:28, Akihiko Odaki wrote: >> +static void virgl_write_context_fence(void *opaque, uint32_t ctx_id, >> + uint32_t ring_idx, uint64_t fence) >> +{ >> + VirtIOGPU *g = opaque; > > What about taking the BQL here instead of having a QEMUBH? That will block virglrenderer thread writing the fence, which in turns might block other virglrenderer threads.
On 2024/10/19 6:31, Dmitry Osipenko wrote: > On 10/18/24 08:28, Akihiko Odaki wrote: >>> +static void virgl_write_context_fence(void *opaque, uint32_t ctx_id, >>> + uint32_t ring_idx, uint64_t fence) >>> +{ >>> + VirtIOGPU *g = opaque; >> >> What about taking the BQL here instead of having a QEMUBH? > > That will block virglrenderer thread writing the fence, which in turns > might block other virglrenderer threads. Looking at virglrenderer's source code, the thread writing the fence is the only thread it creates. Otherwise virglrenderer's code should be executed only in the QEMU thread calling virglrenderer's functions, which always holds the BQL. So taking the BQL here will not interfere with another thread.
On 10/22/24 08:11, Akihiko Odaki wrote: > On 2024/10/19 6:31, Dmitry Osipenko wrote: >> On 10/18/24 08:28, Akihiko Odaki wrote: >>>> +static void virgl_write_context_fence(void *opaque, uint32_t ctx_id, >>>> + uint32_t ring_idx, uint64_t >>>> fence) >>>> +{ >>>> + VirtIOGPU *g = opaque; >>> >>> What about taking the BQL here instead of having a QEMUBH? >> >> That will block virglrenderer thread writing the fence, which in turns >> might block other virglrenderer threads. > > Looking at virglrenderer's source code, the thread writing the fence is > the only thread it creates. Otherwise virglrenderer's code should be > executed only in the QEMU thread calling virglrenderer's functions, > which always holds the BQL. So taking the BQL here will not interfere > with another thread. There are other problems with that BQL approach: 1. virgl_renderer_context_destroy() is executed from QEMU's main-loop and it will terminate the virglrenderer's fence-sync threads which at the same time will take the same BQL if fence fires while VM is shutting down and then this condition will result in a deadlock. 2. In a case of vrend, the fence-sync thread uses a different GL context than the QEMU's main-loop vrend thread. Feels like too much potential problems with GL context switching here. We shouldn't be able to jump into QEMU's GL code from a non-vrend GL thread. Too many complications with the BQL approach for a little benefit, IMO.
On 10/22/24 15:37, Dmitry Osipenko wrote: > On 10/22/24 08:11, Akihiko Odaki wrote: >> On 2024/10/19 6:31, Dmitry Osipenko wrote: >>> On 10/18/24 08:28, Akihiko Odaki wrote: >>>>> +static void virgl_write_context_fence(void *opaque, uint32_t ctx_id, >>>>> + uint32_t ring_idx, uint64_t >>>>> fence) >>>>> +{ >>>>> + VirtIOGPU *g = opaque; >>>> >>>> What about taking the BQL here instead of having a QEMUBH? >>> >>> That will block virglrenderer thread writing the fence, which in turns >>> might block other virglrenderer threads. >> >> Looking at virglrenderer's source code, the thread writing the fence is >> the only thread it creates. Otherwise virglrenderer's code should be >> executed only in the QEMU thread calling virglrenderer's functions, >> which always holds the BQL. So taking the BQL here will not interfere >> with another thread. > > There are other problems with that BQL approach: > > 1. virgl_renderer_context_destroy() is executed from QEMU's main-loop > and it will terminate the virglrenderer's fence-sync threads which at > the same time will take the same BQL if fence fires while VM is shutting > down and then this condition will result in a deadlock. I mixed up virgl_renderer_context_destroy() with virgl_renderer_cleanup() here, but actually virgl_renderer_context_destroy() will have the same deadlock issue.
On 2024/10/22 21:44, Dmitry Osipenko wrote: > On 10/22/24 15:37, Dmitry Osipenko wrote: >> On 10/22/24 08:11, Akihiko Odaki wrote: >>> On 2024/10/19 6:31, Dmitry Osipenko wrote: >>>> On 10/18/24 08:28, Akihiko Odaki wrote: >>>>>> +static void virgl_write_context_fence(void *opaque, uint32_t ctx_id, >>>>>> + uint32_t ring_idx, uint64_t >>>>>> fence) >>>>>> +{ >>>>>> + VirtIOGPU *g = opaque; >>>>> >>>>> What about taking the BQL here instead of having a QEMUBH? >>>> >>>> That will block virglrenderer thread writing the fence, which in turns >>>> might block other virglrenderer threads. >>> >>> Looking at virglrenderer's source code, the thread writing the fence is >>> the only thread it creates. Otherwise virglrenderer's code should be >>> executed only in the QEMU thread calling virglrenderer's functions, >>> which always holds the BQL. So taking the BQL here will not interfere >>> with another thread. >> >> There are other problems with that BQL approach: >> >> 1. virgl_renderer_context_destroy() is executed from QEMU's main-loop >> and it will terminate the virglrenderer's fence-sync threads which at >> the same time will take the same BQL if fence fires while VM is shutting >> down and then this condition will result in a deadlock. > > I mixed up virgl_renderer_context_destroy() with > virgl_renderer_cleanup() here, but actually > virgl_renderer_context_destroy() will have the same deadlock issue. > Thanks for explanation. This explains well why QEMUBH is needed. Regards, Akihiko Odaki
On 2024/10/15 13:32, Dmitry Osipenko wrote: > Support asynchronous fencing feature of virglrenderer. It allows Qemu to > handle fence as soon as it's signalled instead of periodically polling > the fence status. This feature is required for enabling DRM context > support in Qemu because legacy fencing mode isn't supported for DRM > contexts in virglrenderer. > > Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com> > --- > hw/display/virtio-gpu-gl.c | 3 + > hw/display/virtio-gpu-virgl.c | 134 ++++++++++++++++++++++++++++----- > include/hw/virtio/virtio-gpu.h | 14 ++++ > 3 files changed, 133 insertions(+), 18 deletions(-) > > diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c > index 7c0e448b4661..53d938f23f20 100644 > --- a/hw/display/virtio-gpu-gl.c > +++ b/hw/display/virtio-gpu-gl.c > @@ -170,6 +170,9 @@ static void virtio_gpu_gl_device_unrealize(DeviceState *qdev) > if (gl->renderer_state >= RS_INITED) { > #if VIRGL_VERSION_MAJOR >= 1 > qemu_bh_delete(gl->cmdq_resume_bh); > + > + virtio_gpu_virgl_reset_async_fences(g); > + qemu_bh_delete(gl->async_fence_bh); > #endif > if (virtio_gpu_stats_enabled(g->parent_obj.conf)) { > timer_free(gl->print_stats); > diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c > index b32ce44ba2b1..ad6512987079 100644 > --- a/hw/display/virtio-gpu-virgl.c > +++ b/hw/display/virtio-gpu-virgl.c > @@ -891,6 +891,7 @@ static void virgl_cmd_set_scanout_blob(VirtIOGPU *g, > void virtio_gpu_virgl_process_cmd(VirtIOGPU *g, > struct virtio_gpu_ctrl_command *cmd) > { > + VirtIOGPUGL *gl = VIRTIO_GPU_GL(g); > bool cmd_suspended = false; > int ret; > > @@ -992,35 +993,117 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g, > > trace_virtio_gpu_fence_ctrl(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type); > > - ret = virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, 0); > - if (ret) > - qemu_log_mask(LOG_GUEST_ERROR, > - "%s: virgl_renderer_create_fence error: %s", > - __func__, strerror(-ret)); > + if (gl->context_fence_enabled && > + (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX)) { > + uint32_t flags = 0; > + > + ret = virgl_renderer_context_create_fence(cmd->cmd_hdr.ctx_id, flags, > + cmd->cmd_hdr.ring_idx, > + cmd->cmd_hdr.fence_id); > + if (ret) > + qemu_log_mask(LOG_GUEST_ERROR, > + "%s: virgl_renderer_context_create_fence error: %s", > + __func__, strerror(ret)); This should use: strerror(-ret) > + } else { > + ret = virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, 0); > + if (ret) > + qemu_log_mask(LOG_GUEST_ERROR, > + "%s: virgl_renderer_create_fence error: %s", > + __func__, strerror(-ret)); > + } > } > > -static void virgl_write_fence(void *opaque, uint32_t fence) > +static void virtio_gpu_virgl_async_fence_bh(void *opaque) > { > - VirtIOGPU *g = opaque; > + struct virtio_gpu_virgl_context_fence *f, *f_tmp; > struct virtio_gpu_ctrl_command *cmd, *tmp; > + VirtIOGPU *g = opaque; > + VirtIOGPUGL *gl = VIRTIO_GPU_GL(g); > > - QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) { > - /* > - * the guest can end up emitting fences out of order > - * so we should check all fenced cmds not just the first one. > - */ > - if (cmd->cmd_hdr.fence_id > fence) { > - continue; > + qemu_mutex_lock(&gl->async_fence_lock); > + > + QSLIST_FOREACH_SAFE(f, &gl->async_fenceq, next, f_tmp) { > + QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) { > + /* > + * the guest can end up emitting fences out of order > + * so we should check all fenced cmds not just the first one. > + */ > + if (cmd->cmd_hdr.fence_id > f->fence_id) { > + continue; > + } > + if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX) { > + if (cmd->cmd_hdr.ring_idx != f->ring_idx) { > + continue; > + } > + if (cmd->cmd_hdr.ctx_id != f->ctx_id) { > + continue; > + } > + } else if (f->ring_idx >= 0) { > + /* ctx0 GL-query fences don't have ring info */ > + continue; > + } > + virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA); > + QTAILQ_REMOVE(&g->fenceq, cmd, next); > + g_free(cmd); > } > - trace_virtio_gpu_fence_resp(cmd->cmd_hdr.fence_id); > - virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA); > - QTAILQ_REMOVE(&g->fenceq, cmd, next); > - g_free(cmd); > + > + trace_virtio_gpu_fence_resp(f->fence_id); > + QSLIST_REMOVE(&gl->async_fenceq, f, virtio_gpu_virgl_context_fence, > + next); Use QSLIST_FIRST() and QSLIST_REMOVE_HEAD() to show that we are only dealing with the head of the list to prove a singly-linked list is sufficient for our use case. > + g_free(f); > g->inflight--; > if (virtio_gpu_stats_enabled(g->parent_obj.conf)) { > trace_virtio_gpu_dec_inflight_fences(g->inflight); > } > } > + > + qemu_mutex_unlock(&gl->async_fence_lock); > +} > + > +void virtio_gpu_virgl_reset_async_fences(VirtIOGPU *g) > +{ > + struct virtio_gpu_virgl_context_fence *f, *f_tmp; > + VirtIOGPUGL *gl = VIRTIO_GPU_GL(g); > + > + QSLIST_FOREACH_SAFE(f, &gl->async_fenceq, next, f_tmp) { > + QSLIST_REMOVE(&gl->async_fenceq, f, virtio_gpu_virgl_context_fence, > + next); > + g_free(f); > + } > +} > + > +static void > +virtio_gpu_virgl_push_async_fence(VirtIOGPU *g, uint32_t ctx_id, > + int64_t ring_idx, uint64_t fence_id) > +{ > + struct virtio_gpu_virgl_context_fence *f; > + VirtIOGPUGL *gl = VIRTIO_GPU_GL(g); > + > + f = g_new(struct virtio_gpu_virgl_context_fence, 1); > + f->ctx_id = ctx_id; > + f->ring_idx = ring_idx; > + f->fence_id = fence_id; > + > + qemu_mutex_lock(&gl->async_fence_lock); > + QSLIST_INSERT_HEAD(&gl->async_fenceq, f, next); > + qemu_mutex_unlock(&gl->async_fence_lock); > + > + qemu_bh_schedule(gl->async_fence_bh); > +} > + > +static void virgl_write_fence(void *opaque, uint32_t fence) > +{ > + VirtIOGPU *g = opaque; > + > + virtio_gpu_virgl_push_async_fence(g, 0, -1, fence); > +} > + > +static void virgl_write_context_fence(void *opaque, uint32_t ctx_id, > + uint32_t ring_idx, uint64_t fence) > +{ > + VirtIOGPU *g = opaque; > + > + virtio_gpu_virgl_push_async_fence(g, ctx_id, ring_idx, fence); > } > > static virgl_renderer_gl_context > @@ -1110,6 +1193,8 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g) > dpy_gfx_replace_surface(g->parent_obj.scanout[i].con, NULL); > dpy_gl_scanout_disable(g->parent_obj.scanout[i].con); > } > + > + virtio_gpu_virgl_reset_async_fences(g); > } > > void virtio_gpu_virgl_reset(VirtIOGPU *g) > @@ -1127,6 +1212,12 @@ int virtio_gpu_virgl_init(VirtIOGPU *g) > if (qemu_egl_display) { > virtio_gpu_3d_cbs.version = 4; > virtio_gpu_3d_cbs.get_egl_display = virgl_get_egl_display; > +#if VIRGL_VERSION_MAJOR >= 1 > + virtio_gpu_3d_cbs.write_context_fence = virgl_write_context_fence; > + flags |= VIRGL_RENDERER_ASYNC_FENCE_CB; > + flags |= VIRGL_RENDERER_THREAD_SYNC; > + gl->context_fence_enabled = true; > +#endif > } > #endif > #ifdef VIRGL_RENDERER_D3D11_SHARE_TEXTURE > @@ -1160,6 +1251,13 @@ int virtio_gpu_virgl_init(VirtIOGPU *g) > gl->cmdq_resume_bh = aio_bh_new(qemu_get_aio_context(), > virtio_gpu_virgl_resume_cmdq_bh, > g); > + > + gl->async_fence_bh = aio_bh_new(qemu_get_aio_context(), > + virtio_gpu_virgl_async_fence_bh, > + g); > + > + qemu_mutex_init(&gl->async_fence_lock); > + QSLIST_INIT(&gl->async_fenceq); This QSLIST_INIT() call is unnecessary. gl->async_fenceq should be already initialized with zero. > #endif > > return 0; > diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h > index 230fa0c4ee0a..5673f0be85f4 100644 > --- a/include/hw/virtio/virtio-gpu.h > +++ b/include/hw/virtio/virtio-gpu.h > @@ -229,6 +229,13 @@ struct VirtIOGPUClass { > Error **errp); > }; > > +struct virtio_gpu_virgl_context_fence { > + uint32_t ctx_id; > + int64_t ring_idx; > + uint64_t fence_id; > + QSLIST_ENTRY(virtio_gpu_virgl_context_fence) next; > +}; > + > /* VirtIOGPUGL renderer states */ > typedef enum { > RS_START, /* starting state */ > @@ -246,6 +253,12 @@ struct VirtIOGPUGL { > QEMUTimer *print_stats; > > QEMUBH *cmdq_resume_bh; > + > + QEMUBH *async_fence_bh; > + QemuMutex async_fence_lock; Use QSLIST_INSERT_HEAD_ATOMIC() and QSLIST_MOVE_ATOMIC() to avoid locking. Regards, Akihiko Odaki > + QSLIST_HEAD(, virtio_gpu_virgl_context_fence) async_fenceq; > + > + bool context_fence_enabled; > }; > > struct VhostUserGPU { > @@ -360,5 +373,6 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g); > void virtio_gpu_virgl_reset(VirtIOGPU *g); > int virtio_gpu_virgl_init(VirtIOGPU *g); > GArray *virtio_gpu_virgl_get_capsets(VirtIOGPU *g); > +void virtio_gpu_virgl_reset_async_fences(VirtIOGPU *g); > > #endif
On 10/23/24 13:04, Akihiko Odaki wrote: ... >> + ret = virgl_renderer_context_create_fence(cmd- >> >cmd_hdr.ctx_id, flags, >> + cmd->cmd_hdr.ring_idx, >> + cmd- >> >cmd_hdr.fence_id); >> + if (ret) >> + qemu_log_mask(LOG_GUEST_ERROR, >> + "%s: virgl_renderer_context_create_fence >> error: %s", >> + __func__, strerror(ret)); > > This should use: strerror(-ret) Indeed, here error code should be negative, while for the legacy virgl_renderer_create_fence() it's positive. Will correct it in v3, add a clarifying comment and address other comments. Thanks for the review!
diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c index 7c0e448b4661..53d938f23f20 100644 --- a/hw/display/virtio-gpu-gl.c +++ b/hw/display/virtio-gpu-gl.c @@ -170,6 +170,9 @@ static void virtio_gpu_gl_device_unrealize(DeviceState *qdev) if (gl->renderer_state >= RS_INITED) { #if VIRGL_VERSION_MAJOR >= 1 qemu_bh_delete(gl->cmdq_resume_bh); + + virtio_gpu_virgl_reset_async_fences(g); + qemu_bh_delete(gl->async_fence_bh); #endif if (virtio_gpu_stats_enabled(g->parent_obj.conf)) { timer_free(gl->print_stats); diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c index b32ce44ba2b1..ad6512987079 100644 --- a/hw/display/virtio-gpu-virgl.c +++ b/hw/display/virtio-gpu-virgl.c @@ -891,6 +891,7 @@ static void virgl_cmd_set_scanout_blob(VirtIOGPU *g, void virtio_gpu_virgl_process_cmd(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd) { + VirtIOGPUGL *gl = VIRTIO_GPU_GL(g); bool cmd_suspended = false; int ret; @@ -992,35 +993,117 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g, trace_virtio_gpu_fence_ctrl(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type); - ret = virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, 0); - if (ret) - qemu_log_mask(LOG_GUEST_ERROR, - "%s: virgl_renderer_create_fence error: %s", - __func__, strerror(-ret)); + if (gl->context_fence_enabled && + (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX)) { + uint32_t flags = 0; + + ret = virgl_renderer_context_create_fence(cmd->cmd_hdr.ctx_id, flags, + cmd->cmd_hdr.ring_idx, + cmd->cmd_hdr.fence_id); + if (ret) + qemu_log_mask(LOG_GUEST_ERROR, + "%s: virgl_renderer_context_create_fence error: %s", + __func__, strerror(ret)); + } else { + ret = virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, 0); + if (ret) + qemu_log_mask(LOG_GUEST_ERROR, + "%s: virgl_renderer_create_fence error: %s", + __func__, strerror(-ret)); + } } -static void virgl_write_fence(void *opaque, uint32_t fence) +static void virtio_gpu_virgl_async_fence_bh(void *opaque) { - VirtIOGPU *g = opaque; + struct virtio_gpu_virgl_context_fence *f, *f_tmp; struct virtio_gpu_ctrl_command *cmd, *tmp; + VirtIOGPU *g = opaque; + VirtIOGPUGL *gl = VIRTIO_GPU_GL(g); - QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) { - /* - * the guest can end up emitting fences out of order - * so we should check all fenced cmds not just the first one. - */ - if (cmd->cmd_hdr.fence_id > fence) { - continue; + qemu_mutex_lock(&gl->async_fence_lock); + + QSLIST_FOREACH_SAFE(f, &gl->async_fenceq, next, f_tmp) { + QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) { + /* + * the guest can end up emitting fences out of order + * so we should check all fenced cmds not just the first one. + */ + if (cmd->cmd_hdr.fence_id > f->fence_id) { + continue; + } + if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX) { + if (cmd->cmd_hdr.ring_idx != f->ring_idx) { + continue; + } + if (cmd->cmd_hdr.ctx_id != f->ctx_id) { + continue; + } + } else if (f->ring_idx >= 0) { + /* ctx0 GL-query fences don't have ring info */ + continue; + } + virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA); + QTAILQ_REMOVE(&g->fenceq, cmd, next); + g_free(cmd); } - trace_virtio_gpu_fence_resp(cmd->cmd_hdr.fence_id); - virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA); - QTAILQ_REMOVE(&g->fenceq, cmd, next); - g_free(cmd); + + trace_virtio_gpu_fence_resp(f->fence_id); + QSLIST_REMOVE(&gl->async_fenceq, f, virtio_gpu_virgl_context_fence, + next); + g_free(f); g->inflight--; if (virtio_gpu_stats_enabled(g->parent_obj.conf)) { trace_virtio_gpu_dec_inflight_fences(g->inflight); } } + + qemu_mutex_unlock(&gl->async_fence_lock); +} + +void virtio_gpu_virgl_reset_async_fences(VirtIOGPU *g) +{ + struct virtio_gpu_virgl_context_fence *f, *f_tmp; + VirtIOGPUGL *gl = VIRTIO_GPU_GL(g); + + QSLIST_FOREACH_SAFE(f, &gl->async_fenceq, next, f_tmp) { + QSLIST_REMOVE(&gl->async_fenceq, f, virtio_gpu_virgl_context_fence, + next); + g_free(f); + } +} + +static void +virtio_gpu_virgl_push_async_fence(VirtIOGPU *g, uint32_t ctx_id, + int64_t ring_idx, uint64_t fence_id) +{ + struct virtio_gpu_virgl_context_fence *f; + VirtIOGPUGL *gl = VIRTIO_GPU_GL(g); + + f = g_new(struct virtio_gpu_virgl_context_fence, 1); + f->ctx_id = ctx_id; + f->ring_idx = ring_idx; + f->fence_id = fence_id; + + qemu_mutex_lock(&gl->async_fence_lock); + QSLIST_INSERT_HEAD(&gl->async_fenceq, f, next); + qemu_mutex_unlock(&gl->async_fence_lock); + + qemu_bh_schedule(gl->async_fence_bh); +} + +static void virgl_write_fence(void *opaque, uint32_t fence) +{ + VirtIOGPU *g = opaque; + + virtio_gpu_virgl_push_async_fence(g, 0, -1, fence); +} + +static void virgl_write_context_fence(void *opaque, uint32_t ctx_id, + uint32_t ring_idx, uint64_t fence) +{ + VirtIOGPU *g = opaque; + + virtio_gpu_virgl_push_async_fence(g, ctx_id, ring_idx, fence); } static virgl_renderer_gl_context @@ -1110,6 +1193,8 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g) dpy_gfx_replace_surface(g->parent_obj.scanout[i].con, NULL); dpy_gl_scanout_disable(g->parent_obj.scanout[i].con); } + + virtio_gpu_virgl_reset_async_fences(g); } void virtio_gpu_virgl_reset(VirtIOGPU *g) @@ -1127,6 +1212,12 @@ int virtio_gpu_virgl_init(VirtIOGPU *g) if (qemu_egl_display) { virtio_gpu_3d_cbs.version = 4; virtio_gpu_3d_cbs.get_egl_display = virgl_get_egl_display; +#if VIRGL_VERSION_MAJOR >= 1 + virtio_gpu_3d_cbs.write_context_fence = virgl_write_context_fence; + flags |= VIRGL_RENDERER_ASYNC_FENCE_CB; + flags |= VIRGL_RENDERER_THREAD_SYNC; + gl->context_fence_enabled = true; +#endif } #endif #ifdef VIRGL_RENDERER_D3D11_SHARE_TEXTURE @@ -1160,6 +1251,13 @@ int virtio_gpu_virgl_init(VirtIOGPU *g) gl->cmdq_resume_bh = aio_bh_new(qemu_get_aio_context(), virtio_gpu_virgl_resume_cmdq_bh, g); + + gl->async_fence_bh = aio_bh_new(qemu_get_aio_context(), + virtio_gpu_virgl_async_fence_bh, + g); + + qemu_mutex_init(&gl->async_fence_lock); + QSLIST_INIT(&gl->async_fenceq); #endif return 0; diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index 230fa0c4ee0a..5673f0be85f4 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -229,6 +229,13 @@ struct VirtIOGPUClass { Error **errp); }; +struct virtio_gpu_virgl_context_fence { + uint32_t ctx_id; + int64_t ring_idx; + uint64_t fence_id; + QSLIST_ENTRY(virtio_gpu_virgl_context_fence) next; +}; + /* VirtIOGPUGL renderer states */ typedef enum { RS_START, /* starting state */ @@ -246,6 +253,12 @@ struct VirtIOGPUGL { QEMUTimer *print_stats; QEMUBH *cmdq_resume_bh; + + QEMUBH *async_fence_bh; + QemuMutex async_fence_lock; + QSLIST_HEAD(, virtio_gpu_virgl_context_fence) async_fenceq; + + bool context_fence_enabled; }; struct VhostUserGPU { @@ -360,5 +373,6 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g); void virtio_gpu_virgl_reset(VirtIOGPU *g); int virtio_gpu_virgl_init(VirtIOGPU *g); GArray *virtio_gpu_virgl_get_capsets(VirtIOGPU *g); +void virtio_gpu_virgl_reset_async_fences(VirtIOGPU *g); #endif
Support asynchronous fencing feature of virglrenderer. It allows Qemu to handle fence as soon as it's signalled instead of periodically polling the fence status. This feature is required for enabling DRM context support in Qemu because legacy fencing mode isn't supported for DRM contexts in virglrenderer. Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com> --- hw/display/virtio-gpu-gl.c | 3 + hw/display/virtio-gpu-virgl.c | 134 ++++++++++++++++++++++++++++----- include/hw/virtio/virtio-gpu.h | 14 ++++ 3 files changed, 133 insertions(+), 18 deletions(-)