Message ID | 20230409123957.29553-4-dmitry.osipenko@collabora.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add sync object UAPI support to VirtIO-GPU driver | expand |
Hi Dmitry, On Sun, 9 Apr 2023 at 13:40, Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > +static void virtio_gpu_free_syncobjs(struct drm_syncobj **syncobjs, > + uint32_t nr_syncobjs) > +{ > + uint32_t i = nr_syncobjs; > + > + while (i--) { > + if (syncobjs[i]) > + drm_syncobj_put(syncobjs[i]); > + } > + > + kvfree(syncobjs); > +} > + > +static void virtio_gpu_reset_syncobjs(struct drm_syncobj **syncobjs, > + uint32_t nr_syncobjs) > +{ > + uint32_t i; > + > + for (i = 0; i < nr_syncobjs; i++) { > + if (syncobjs[i]) > + drm_syncobj_replace_fence(syncobjs[i], NULL); > + } > +} > + Can I bribe you (with cookies) about dropping the NULL checks above? They're dead code and rather misleading IMHO. > +static void > +virtio_gpu_free_post_deps(struct virtio_gpu_submit_post_dep *post_deps, > + uint32_t nr_syncobjs) > +{ > + uint32_t i = nr_syncobjs; > + > + while (i--) { > + kfree(post_deps[i].chain); > + drm_syncobj_put(post_deps[i].syncobj); > + } > + > + kvfree(post_deps); > +} > + > +static int virtio_gpu_parse_post_deps(struct virtio_gpu_submit *submit) > +{ > + struct drm_virtgpu_execbuffer *exbuf = submit->exbuf; > + struct drm_virtgpu_execbuffer_syncobj syncobj_desc; > + struct virtio_gpu_submit_post_dep *post_deps; > + u32 num_out_syncobjs = exbuf->num_out_syncobjs; > + size_t syncobj_stride = exbuf->syncobj_stride; > + int ret = 0, i; > + > + if (!num_out_syncobjs) > + return 0; > + > + post_deps = kvcalloc(num_out_syncobjs, sizeof(*post_deps), GFP_KERNEL); > + if (!post_deps) > + return -ENOMEM; > + > + for (i = 0; i < num_out_syncobjs; i++) { > + uint64_t address = exbuf->out_syncobjs + i * syncobj_stride; > + For my own education: what's the specifics/requirements behind the stride? is there a use-case for the stride to vary across in/out syncobj? Off the top of my head: userspace could have an array of larger structs, each embedding an syncobj. Thus passing the stride, the kernel will fetch/update them in-place w/o caring about the other data. Or perhaps there is another trick that userspace utilises the stride for? > + if (copy_from_user(&syncobj_desc, > + u64_to_user_ptr(address), > + min(syncobj_stride, sizeof(syncobj_desc)))) { > + ret = -EFAULT; > + break; > + } > + We seem to be parsing garbage(?) stack data in the syncobj_stride < sizeof(syncobj_desc) case. Zeroing/reseting the syncobj_desc on each iteration is one approach - be that fully or in part. Alternatively we could error out on syncobj_stride < sizeof(syncobj_desc). > + post_deps[i].point = syncobj_desc.point; > + > + if (syncobj_desc.flags) { > + ret = -EINVAL; > + break; > + } > + > + if (syncobj_desc.point) { > + post_deps[i].chain = dma_fence_chain_alloc(); > + if (!post_deps[i].chain) { > + ret = -ENOMEM; > + break; > + } > + } > + > + post_deps[i].syncobj = drm_syncobj_find(submit->file, > + syncobj_desc.handle); > + if (!post_deps[i].syncobj) { > + ret = -EINVAL; I think we want a kfree(chain) here. Otherwise we'll leak it, right? > + break; > + } > + } > + > + if (ret) { > + virtio_gpu_free_post_deps(post_deps, i); > + return ret; > + } > + > + submit->num_out_syncobjs = num_out_syncobjs; > + submit->post_deps = post_deps; > + > + return 0; > +} > + With the two issues in virtio_gpu_parse_post_deps() addressed, the series is: Reviewed-by; Emil Velikov <emil.velikov@collabora.com> HTH Emil
Hello, On 4/11/23 14:07, Emil Velikov wrote: > Hi Dmitry, > > On Sun, 9 Apr 2023 at 13:40, Dmitry Osipenko > <dmitry.osipenko@collabora.com> wrote: > >> +static void virtio_gpu_free_syncobjs(struct drm_syncobj **syncobjs, >> + uint32_t nr_syncobjs) >> +{ >> + uint32_t i = nr_syncobjs; >> + >> + while (i--) { >> + if (syncobjs[i]) >> + drm_syncobj_put(syncobjs[i]); >> + } >> + >> + kvfree(syncobjs); >> +} >> + > >> +static void virtio_gpu_reset_syncobjs(struct drm_syncobj **syncobjs, >> + uint32_t nr_syncobjs) >> +{ >> + uint32_t i; >> + >> + for (i = 0; i < nr_syncobjs; i++) { >> + if (syncobjs[i]) >> + drm_syncobj_replace_fence(syncobjs[i], NULL); >> + } >> +} >> + > > Can I bribe you (with cookies) about dropping the NULL checks above? > They're dead code and rather misleading IMHO. When userspace doesn't set the VIRTGPU_EXECBUF_SYNCOBJ_RESET flag in virtio_gpu_parse_deps(), the syncobjs[i] is NULL. Hence not a dead code at all :) >> +static void >> +virtio_gpu_free_post_deps(struct virtio_gpu_submit_post_dep *post_deps, >> + uint32_t nr_syncobjs) >> +{ >> + uint32_t i = nr_syncobjs; >> + >> + while (i--) { >> + kfree(post_deps[i].chain); >> + drm_syncobj_put(post_deps[i].syncobj); >> + } >> + >> + kvfree(post_deps); >> +} >> + >> +static int virtio_gpu_parse_post_deps(struct virtio_gpu_submit *submit) >> +{ >> + struct drm_virtgpu_execbuffer *exbuf = submit->exbuf; >> + struct drm_virtgpu_execbuffer_syncobj syncobj_desc; >> + struct virtio_gpu_submit_post_dep *post_deps; >> + u32 num_out_syncobjs = exbuf->num_out_syncobjs; >> + size_t syncobj_stride = exbuf->syncobj_stride; >> + int ret = 0, i; >> + >> + if (!num_out_syncobjs) >> + return 0; >> + >> + post_deps = kvcalloc(num_out_syncobjs, sizeof(*post_deps), GFP_KERNEL); >> + if (!post_deps) >> + return -ENOMEM; >> + >> + for (i = 0; i < num_out_syncobjs; i++) { >> + uint64_t address = exbuf->out_syncobjs + i * syncobj_stride; >> + > > For my own education: what's the specifics/requirements behind the > stride? is there a use-case for the stride to vary across in/out > syncobj? Stride is the same for in/out syncobjs as the same struct drm_virtgpu_execbuffer_syncobj is used by both. The out-syncobj don't use the "flags" field of drm_virtgpu_execbuffer_syncobj. We could use separate strides and desc for in/out syncobjs, but in practice it's unlikely that we will be extending the desc anytime soon and usually there are not many out-syncobj to care about the wasted field. On the other hand, if we will ever need to extend desc for in-syncobjs, there will be more wasted fields. Maybe it indeed won't hurt to split in/out syncobjs, for consistency. I'll think about it for v6, thanks. > Off the top of my head: userspace could have an array of larger > structs, each embedding an syncobj. Thus passing the stride, the > kernel will fetch/update them in-place w/o caring about the other > data. > Or perhaps there is another trick that userspace utilises the stride for? Stride is only about potential future expansion of the struct drm_virtgpu_execbuffer_syncobj with new fields. There shouldn't be any special tricks for userspace to use. >> + if (copy_from_user(&syncobj_desc, >> + u64_to_user_ptr(address), >> + min(syncobj_stride, sizeof(syncobj_desc)))) { >> + ret = -EFAULT; >> + break; >> + } >> + > > We seem to be parsing garbage(?) stack data in the syncobj_stride < > sizeof(syncobj_desc) case. > > Zeroing/reseting the syncobj_desc on each iteration is one approach - > be that fully or in part. Alternatively we could error out on > syncobj_stride < sizeof(syncobj_desc). Good catch! It indeed needs to be zeroed. Nothing terrible will happen today for kernel if it will use garbage data, but a malfunctioning userspace may happen to appear working properly. >> + post_deps[i].point = syncobj_desc.point; >> + >> + if (syncobj_desc.flags) { >> + ret = -EINVAL; >> + break; >> + } >> + >> + if (syncobj_desc.point) { >> + post_deps[i].chain = dma_fence_chain_alloc(); >> + if (!post_deps[i].chain) { >> + ret = -ENOMEM; >> + break; >> + } >> + } >> + >> + post_deps[i].syncobj = drm_syncobj_find(submit->file, >> + syncobj_desc.handle); >> + if (!post_deps[i].syncobj) { >> + ret = -EINVAL; > > I think we want a kfree(chain) here. Otherwise we'll leak it, right? I'm sure there was a kfree here in one of previous version of the patch. Another good catch, thanks :) >> + break; >> + } >> + } >> + >> + if (ret) { >> + virtio_gpu_free_post_deps(post_deps, i); >> + return ret; >> + } >> + >> + submit->num_out_syncobjs = num_out_syncobjs; >> + submit->post_deps = post_deps; >> + >> + return 0; >> +} >> + > > > With the two issues in virtio_gpu_parse_post_deps() addressed, the series is: > Reviewed-by; Emil Velikov <emil.velikov@collabora.com> Thanks you for the review!
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index add075681e18..a22155577152 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -176,7 +176,8 @@ static const struct drm_driver driver = { * If KMS is disabled DRIVER_MODESET and DRIVER_ATOMIC are masked * out via drm_device::driver_features: */ - .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_RENDER | DRIVER_ATOMIC, + .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_RENDER | DRIVER_ATOMIC | + DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE, .open = virtio_gpu_driver_open, .postclose = virtio_gpu_driver_postclose, diff --git a/drivers/gpu/drm/virtio/virtgpu_submit.c b/drivers/gpu/drm/virtio/virtgpu_submit.c index b60dea077240..b936cae1884e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_submit.c +++ b/drivers/gpu/drm/virtio/virtgpu_submit.c @@ -14,11 +14,24 @@ #include <linux/uaccess.h> #include <drm/drm_file.h> +#include <drm/drm_syncobj.h> #include <drm/virtgpu_drm.h> #include "virtgpu_drv.h" +struct virtio_gpu_submit_post_dep { + struct drm_syncobj *syncobj; + struct dma_fence_chain *chain; + uint64_t point; +}; + struct virtio_gpu_submit { + struct virtio_gpu_submit_post_dep *post_deps; + unsigned int num_out_syncobjs; + + struct drm_syncobj **in_syncobjs; + unsigned int num_in_syncobjs; + struct virtio_gpu_object_array *buflist; struct drm_virtgpu_execbuffer *exbuf; struct virtio_gpu_fence *out_fence; @@ -59,6 +72,194 @@ static int virtio_gpu_dma_fence_wait(struct virtio_gpu_submit *submit, return 0; } +static void virtio_gpu_free_syncobjs(struct drm_syncobj **syncobjs, + uint32_t nr_syncobjs) +{ + uint32_t i = nr_syncobjs; + + while (i--) { + if (syncobjs[i]) + drm_syncobj_put(syncobjs[i]); + } + + kvfree(syncobjs); +} + +static int +virtio_gpu_parse_deps(struct virtio_gpu_submit *submit) +{ + struct drm_virtgpu_execbuffer *exbuf = submit->exbuf; + struct drm_virtgpu_execbuffer_syncobj syncobj_desc; + size_t syncobj_stride = exbuf->syncobj_stride; + u32 num_in_syncobjs = exbuf->num_in_syncobjs; + struct drm_syncobj **syncobjs; + int ret = 0, i; + + if (!num_in_syncobjs) + return 0; + + /* + * kvalloc at first tries to allocate memory using kmalloc and + * falls back to vmalloc only on failure. It also uses GFP_NOWARN + * internally for allocations larger than a page size, preventing + * storm of KMSG warnings. + */ + syncobjs = kvcalloc(num_in_syncobjs, sizeof(*syncobjs), GFP_KERNEL); + if (!syncobjs) + return -ENOMEM; + + for (i = 0; i < num_in_syncobjs; i++) { + uint64_t address = exbuf->in_syncobjs + i * syncobj_stride; + struct dma_fence *fence; + + if (copy_from_user(&syncobj_desc, + u64_to_user_ptr(address), + min(syncobj_stride, sizeof(syncobj_desc)))) { + ret = -EFAULT; + break; + } + + if (syncobj_desc.flags & ~VIRTGPU_EXECBUF_SYNCOBJ_FLAGS) { + ret = -EINVAL; + break; + } + + ret = drm_syncobj_find_fence(submit->file, syncobj_desc.handle, + syncobj_desc.point, 0, &fence); + if (ret) + break; + + ret = virtio_gpu_dma_fence_wait(submit, fence); + + dma_fence_put(fence); + if (ret) + break; + + if (syncobj_desc.flags & VIRTGPU_EXECBUF_SYNCOBJ_RESET) { + syncobjs[i] = drm_syncobj_find(submit->file, + syncobj_desc.handle); + if (!syncobjs[i]) { + ret = -EINVAL; + break; + } + } + } + + if (ret) { + virtio_gpu_free_syncobjs(syncobjs, i); + return ret; + } + + submit->num_in_syncobjs = num_in_syncobjs; + submit->in_syncobjs = syncobjs; + + return ret; +} + +static void virtio_gpu_reset_syncobjs(struct drm_syncobj **syncobjs, + uint32_t nr_syncobjs) +{ + uint32_t i; + + for (i = 0; i < nr_syncobjs; i++) { + if (syncobjs[i]) + drm_syncobj_replace_fence(syncobjs[i], NULL); + } +} + +static void +virtio_gpu_free_post_deps(struct virtio_gpu_submit_post_dep *post_deps, + uint32_t nr_syncobjs) +{ + uint32_t i = nr_syncobjs; + + while (i--) { + kfree(post_deps[i].chain); + drm_syncobj_put(post_deps[i].syncobj); + } + + kvfree(post_deps); +} + +static int virtio_gpu_parse_post_deps(struct virtio_gpu_submit *submit) +{ + struct drm_virtgpu_execbuffer *exbuf = submit->exbuf; + struct drm_virtgpu_execbuffer_syncobj syncobj_desc; + struct virtio_gpu_submit_post_dep *post_deps; + u32 num_out_syncobjs = exbuf->num_out_syncobjs; + size_t syncobj_stride = exbuf->syncobj_stride; + int ret = 0, i; + + if (!num_out_syncobjs) + return 0; + + post_deps = kvcalloc(num_out_syncobjs, sizeof(*post_deps), GFP_KERNEL); + if (!post_deps) + return -ENOMEM; + + for (i = 0; i < num_out_syncobjs; i++) { + uint64_t address = exbuf->out_syncobjs + i * syncobj_stride; + + if (copy_from_user(&syncobj_desc, + u64_to_user_ptr(address), + min(syncobj_stride, sizeof(syncobj_desc)))) { + ret = -EFAULT; + break; + } + + post_deps[i].point = syncobj_desc.point; + + if (syncobj_desc.flags) { + ret = -EINVAL; + break; + } + + if (syncobj_desc.point) { + post_deps[i].chain = dma_fence_chain_alloc(); + if (!post_deps[i].chain) { + ret = -ENOMEM; + break; + } + } + + post_deps[i].syncobj = drm_syncobj_find(submit->file, + syncobj_desc.handle); + if (!post_deps[i].syncobj) { + ret = -EINVAL; + break; + } + } + + if (ret) { + virtio_gpu_free_post_deps(post_deps, i); + return ret; + } + + submit->num_out_syncobjs = num_out_syncobjs; + submit->post_deps = post_deps; + + return 0; +} + +static void +virtio_gpu_process_post_deps(struct virtio_gpu_submit *submit) +{ + struct virtio_gpu_submit_post_dep *post_deps = submit->post_deps; + struct dma_fence *fence = &submit->out_fence->f; + uint32_t i; + + for (i = 0; i < submit->num_out_syncobjs; i++) { + if (post_deps[i].chain) { + drm_syncobj_add_point(post_deps[i].syncobj, + post_deps[i].chain, + fence, post_deps[i].point); + post_deps[i].chain = NULL; + } else { + drm_syncobj_replace_fence(post_deps[i].syncobj, fence); + } + } +} + static int virtio_gpu_fence_event_create(struct drm_device *dev, struct drm_file *file, struct virtio_gpu_fence *fence, @@ -122,6 +323,10 @@ static int virtio_gpu_init_submit_buflist(struct virtio_gpu_submit *submit) static void virtio_gpu_cleanup_submit(struct virtio_gpu_submit *submit) { + virtio_gpu_reset_syncobjs(submit->in_syncobjs, submit->num_in_syncobjs); + virtio_gpu_free_syncobjs(submit->in_syncobjs, submit->num_in_syncobjs); + virtio_gpu_free_post_deps(submit->post_deps, submit->num_out_syncobjs); + if (!IS_ERR(submit->buf)) kvfree(submit->buf); @@ -288,6 +493,14 @@ int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, * optimize the path by proceeding directly to the submission * to virtio after the waits. */ + ret = virtio_gpu_parse_post_deps(&submit); + if (ret) + goto cleanup; + + ret = virtio_gpu_parse_deps(&submit); + if (ret) + goto cleanup; + ret = virtio_gpu_wait_in_fence(&submit); if (ret) goto cleanup; @@ -303,6 +516,7 @@ int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, * the job submission path. */ virtio_gpu_install_out_fence_fd(&submit); + virtio_gpu_process_post_deps(&submit); virtio_gpu_complete_submit(&submit); cleanup: virtio_gpu_cleanup_submit(&submit); diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index 7b158fcb02b4..ce4948aacafd 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -64,6 +64,16 @@ struct drm_virtgpu_map { __u32 pad; }; +#define VIRTGPU_EXECBUF_SYNCOBJ_RESET 0x01 +#define VIRTGPU_EXECBUF_SYNCOBJ_FLAGS ( \ + VIRTGPU_EXECBUF_SYNCOBJ_RESET | \ + 0) +struct drm_virtgpu_execbuffer_syncobj { + __u32 handle; + __u32 flags; + __u64 point; +}; + /* fence_fd is modified on success if VIRTGPU_EXECBUF_FENCE_FD_OUT flag is set. */ struct drm_virtgpu_execbuffer { __u32 flags; @@ -73,7 +83,11 @@ struct drm_virtgpu_execbuffer { __u32 num_bo_handles; __s32 fence_fd; /* in/out fence fd (see VIRTGPU_EXECBUF_FENCE_FD_IN/OUT) */ __u32 ring_idx; /* command ring index (see VIRTGPU_EXECBUF_RING_IDX) */ - __u32 pad; + __u32 syncobj_stride; + __u32 num_in_syncobjs; + __u32 num_out_syncobjs; + __u64 in_syncobjs; + __u64 out_syncobjs; }; #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */
Add sync object DRM UAPI support to VirtIO-GPU driver. Sync objects support is needed by native context VirtIO-GPU Mesa drivers, it also will be used by Venus and Virgl contexts. Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com> --- drivers/gpu/drm/virtio/virtgpu_drv.c | 3 +- drivers/gpu/drm/virtio/virtgpu_submit.c | 214 ++++++++++++++++++++++++ include/uapi/drm/virtgpu_drm.h | 16 +- 3 files changed, 231 insertions(+), 2 deletions(-)