diff mbox series

[RFC,v1,1/1] drm/virtio: Support fence-passing feature

Message ID 20231007194747.788934-2-dmitry.osipenko@collabora.com (mailing list archive)
State New, archived
Headers show
Series Support passing VirtIO-GPU fences to host for waiting | expand

Commit Message

Dmitry Osipenko Oct. 7, 2023, 7:47 p.m. UTC
Support extended version of VIRTIO_GPU_CMD_SUBMIT_3D command that allows
passing in-fence IDs to host for waiting, removing need to do expensive
host-guest roundtrips in a case of waiting for fences on a guest side.

Guest userspace must enable new VIRTGPU_CONTEXT_PARAM_FENCE_PASSING flag
and host must support new VIRTIO_GPU_F_FENCE_PASSING feature in order to
activate the fence passing for a given virtio-gpu context. Array of
in-fence IDs is then prepended to the VIRTIO_GPU_CMD_SUBMIT_3D's data,
the previously unused padding field of the command is reused for the
number of in-fences.

A new VIRTGPU_EXECBUF_SHARED_FENCE flag is added to the job submission
UAPI and must be set by userspace if it wants to make fence shareable
with/on host. Certain jobs won't want to share fence, in particular Venus
will benefit from this flag.

Link: https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1138
Link: https://gitlab.freedesktop.org/digetx/qemu/-/commits/native-context-iris
Link: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/4679609
Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
---
 drivers/gpu/drm/virtio/virtgpu_drv.c    |  1 +
 drivers/gpu/drm/virtio/virtgpu_drv.h    | 11 ++-
 drivers/gpu/drm/virtio/virtgpu_fence.c  | 15 +++-
 drivers/gpu/drm/virtio/virtgpu_ioctl.c  | 11 ++-
 drivers/gpu/drm/virtio/virtgpu_kms.c    |  8 +-
 drivers/gpu/drm/virtio/virtgpu_submit.c | 99 ++++++++++++++++++++++++-
 drivers/gpu/drm/virtio/virtgpu_vq.c     |  7 +-
 include/uapi/drm/virtgpu_drm.h          |  3 +
 include/uapi/linux/virtio_gpu.h         | 11 ++-
 9 files changed, 152 insertions(+), 14 deletions(-)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index 644b8ee51009..544918bd38e9 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -148,6 +148,7 @@  static unsigned int features[] = {
 	VIRTIO_GPU_F_RESOURCE_UUID,
 	VIRTIO_GPU_F_RESOURCE_BLOB,
 	VIRTIO_GPU_F_CONTEXT_INIT,
+	VIRTIO_GPU_F_FENCE_PASSING,
 };
 static struct virtio_driver virtio_gpu_driver = {
 	.feature_table = features,
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 8513b671f871..1dc503cb53de 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -149,6 +149,7 @@  struct virtio_gpu_fence {
 	struct virtio_gpu_fence_event *e;
 	struct virtio_gpu_fence_driver *drv;
 	struct list_head node;
+	bool host_shareable;
 };
 
 struct virtio_gpu_vbuffer {
@@ -246,6 +247,7 @@  struct virtio_gpu_device {
 	bool has_resource_blob;
 	bool has_host_visible;
 	bool has_context_init;
+	bool has_fence_passing;
 	struct virtio_shm_region host_visible_region;
 	struct drm_mm host_visible_mm;
 
@@ -273,6 +275,7 @@  struct virtio_gpu_fpriv {
 	uint32_t num_rings;
 	uint64_t base_fence_ctx;
 	uint64_t ring_idx_mask;
+	bool fence_passing_enabled;
 	struct mutex context_lock;
 };
 
@@ -367,7 +370,9 @@  void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev,
 			   void *data, uint32_t data_size,
 			   uint32_t ctx_id,
 			   struct virtio_gpu_object_array *objs,
-			   struct virtio_gpu_fence *fence);
+			   struct virtio_gpu_fence *fence,
+			   uint32_t cmd_size,
+			   unsigned int num_in_fences);
 void virtio_gpu_cmd_transfer_from_host_3d(struct virtio_gpu_device *vgdev,
 					  uint32_t ctx_id,
 					  uint64_t offset, uint32_t level,
@@ -420,6 +425,9 @@  virtio_gpu_cmd_set_scanout_blob(struct virtio_gpu_device *vgdev,
 				uint32_t width, uint32_t height,
 				uint32_t x, uint32_t y);
 
+void virtio_gpu_cmd_in_fence(struct virtio_gpu_device *vgdev,
+			     uint32_t ctx_id, uint64_t fence_id);
+
 /* virtgpu_display.c */
 int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev);
 void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev);
@@ -439,6 +447,7 @@  void virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
 			  struct virtio_gpu_fence *fence);
 void virtio_gpu_fence_event_process(struct virtio_gpu_device *vdev,
 				    u64 fence_id);
+struct virtio_gpu_fence *to_virtio_gpu_fence(struct dma_fence *dma_fence);
 
 /* virtgpu_object.c */
 void virtio_gpu_cleanup_object(struct virtio_gpu_object *bo);
diff --git a/drivers/gpu/drm/virtio/virtgpu_fence.c b/drivers/gpu/drm/virtio/virtgpu_fence.c
index f28357dbde35..1fd3cfeca2f5 100644
--- a/drivers/gpu/drm/virtio/virtgpu_fence.c
+++ b/drivers/gpu/drm/virtio/virtgpu_fence.c
@@ -27,9 +27,6 @@ 
 
 #include "virtgpu_drv.h"
 
-#define to_virtio_gpu_fence(x) \
-	container_of(x, struct virtio_gpu_fence, f)
-
 static const char *virtio_gpu_get_driver_name(struct dma_fence *f)
 {
 	return "virtio_gpu";
@@ -71,6 +68,14 @@  static const struct dma_fence_ops virtio_gpu_fence_ops = {
 	.timeline_value_str  = virtio_gpu_timeline_value_str,
 };
 
+struct virtio_gpu_fence *to_virtio_gpu_fence(struct dma_fence *dma_fence)
+{
+	if (dma_fence->ops != &virtio_gpu_fence_ops)
+		return NULL;
+
+	return container_of(dma_fence, struct virtio_gpu_fence, f);
+}
+
 struct virtio_gpu_fence *virtio_gpu_fence_alloc(struct virtio_gpu_device *vgdev,
 						uint64_t base_fence_ctx,
 						uint32_t ring_idx)
@@ -122,6 +127,10 @@  void virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
 			cpu_to_le32(VIRTIO_GPU_FLAG_INFO_RING_IDX);
 		cmd_hdr->ring_idx = (u8)fence->ring_idx;
 	}
+
+	if (fence->host_shareable)
+		cmd_hdr->flags |=
+			cpu_to_le32(VIRTIO_GPU_FLAG_FENCE_SHAREABLE);
 }
 
 void virtio_gpu_fence_event_process(struct virtio_gpu_device *vgdev,
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index b24b11f25197..3028786c59cd 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -514,7 +514,8 @@  static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev,
 			return PTR_ERR(buf);
 
 		virtio_gpu_cmd_submit(vgdev, buf, rc_blob->cmd_size,
-				      vfpriv->ctx_id, NULL, NULL);
+				      vfpriv->ctx_id, NULL, NULL,
+				      rc_blob->cmd_size, 0);
 	}
 
 	if (guest_blob)
@@ -642,6 +643,14 @@  static int virtio_gpu_context_init_ioctl(struct drm_device *dev,
 
 			vfpriv->ring_idx_mask = value;
 			break;
+		case VIRTGPU_CONTEXT_PARAM_FENCE_PASSING:
+			if (!vgdev->has_fence_passing && value) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+
+			vfpriv->fence_passing_enabled = !!value;
+			break;
 		default:
 			ret = -EINVAL;
 			goto out_unlock;
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 5a3b5aaed1f3..9f4617a75edd 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -197,12 +197,16 @@  int virtio_gpu_init(struct virtio_device *vdev, struct drm_device *dev)
 	if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_CONTEXT_INIT)) {
 		vgdev->has_context_init = true;
 	}
+	if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_FENCE_PASSING)) {
+		vgdev->has_fence_passing = true;
+	}
 
-	DRM_INFO("features: %cvirgl %cedid %cresource_blob %chost_visible",
+	DRM_INFO("features: %cvirgl %cedid %cresource_blob %chost_visible %cfence_passing",
 		 vgdev->has_virgl_3d    ? '+' : '-',
 		 vgdev->has_edid        ? '+' : '-',
 		 vgdev->has_resource_blob ? '+' : '-',
-		 vgdev->has_host_visible ? '+' : '-');
+		 vgdev->has_host_visible ? '+' : '-',
+		 vgdev->has_fence_passing ? '+' : '-');
 
 	DRM_INFO("features: %ccontext_init\n",
 		 vgdev->has_context_init ? '+' : '-');
diff --git a/drivers/gpu/drm/virtio/virtgpu_submit.c b/drivers/gpu/drm/virtio/virtgpu_submit.c
index 3c00135ead45..129d063029a6 100644
--- a/drivers/gpu/drm/virtio/virtgpu_submit.c
+++ b/drivers/gpu/drm/virtio/virtgpu_submit.c
@@ -25,6 +25,11 @@  struct virtio_gpu_submit_post_dep {
 	u64 point;
 };
 
+struct virtio_gpu_in_fence {
+	u64 id;
+	u32 context;
+};
+
 struct virtio_gpu_submit {
 	struct virtio_gpu_submit_post_dep *post_deps;
 	unsigned int num_out_syncobjs;
@@ -32,6 +37,9 @@  struct virtio_gpu_submit {
 	struct drm_syncobj **in_syncobjs;
 	unsigned int num_in_syncobjs;
 
+	struct virtio_gpu_in_fence *in_fences;
+	unsigned int num_in_fences;
+
 	struct virtio_gpu_object_array *buflist;
 	struct drm_virtgpu_execbuffer *exbuf;
 	struct virtio_gpu_fence *out_fence;
@@ -41,6 +49,8 @@  struct virtio_gpu_submit {
 	struct drm_file *file;
 	int out_fence_fd;
 	u64 fence_ctx;
+	u32 data_size;
+	u32 cmd_size;
 	u32 ring_idx;
 	void *buf;
 };
@@ -48,11 +58,44 @@  struct virtio_gpu_submit {
 static int virtio_gpu_do_fence_wait(struct virtio_gpu_submit *submit,
 				    struct dma_fence *in_fence)
 {
+	struct virtio_gpu_fence *fence = to_virtio_gpu_fence(in_fence);
 	u32 context = submit->fence_ctx + submit->ring_idx;
+	struct virtio_gpu_in_fence *vfence, *in_fences;
+	u32 i;
 
 	if (dma_fence_match_context(in_fence, context))
 		return 0;
 
+	if (fence && fence->host_shareable &&
+	    submit->vfpriv->fence_passing_enabled) {
+		/*
+		 * Merge sync_file + syncobj in-fences to avoid sending more
+		 * than one fence per-context to host. Use latest fence from
+		 * the same context.
+		 */
+		for (i = 0; i < submit->num_in_fences; i++) {
+			vfence = &submit->in_fences[i];
+
+			if (dma_fence_match_context(in_fence, vfence->context)) {
+				vfence->id = max(vfence->id, fence->fence_id);
+				return 0;
+			}
+		}
+
+		in_fences = krealloc_array(submit->in_fences,
+					   submit->num_in_fences + 1,
+					   sizeof(*in_fences), GFP_KERNEL);
+		if (!in_fences)
+			return -ENOMEM;
+
+		in_fences[submit->num_in_fences].id = fence->fence_id;
+		in_fences[submit->num_in_fences].context = context;
+		submit->in_fences = in_fences;
+		submit->num_in_fences++;
+
+		return 0;
+	}
+
 	return dma_fence_wait(in_fence, true);
 }
 
@@ -331,6 +374,7 @@  static void virtio_gpu_cleanup_submit(struct virtio_gpu_submit *submit)
 	virtio_gpu_reset_syncobjs(submit->in_syncobjs, submit->num_in_syncobjs);
 	virtio_gpu_free_syncobjs(submit->in_syncobjs, submit->num_in_syncobjs);
 	virtio_gpu_free_post_deps(submit->post_deps, submit->num_out_syncobjs);
+	kfree(submit->in_fences);
 
 	if (!IS_ERR(submit->buf))
 		kvfree(submit->buf);
@@ -348,12 +392,51 @@  static void virtio_gpu_cleanup_submit(struct virtio_gpu_submit *submit)
 		fput(submit->sync_file->file);
 }
 
-static void virtio_gpu_submit(struct virtio_gpu_submit *submit)
+static int virtio_gpu_attach_in_fences(struct virtio_gpu_submit *submit)
 {
-	virtio_gpu_cmd_submit(submit->vgdev, submit->buf, submit->exbuf->size,
+	size_t in_fences_size = sizeof(u64) * submit->num_in_fences;
+	size_t new_data_size = submit->data_size + in_fences_size;
+	void *buf = submit->buf;
+	u64 *in_fences;
+	unsigned int i;
+
+	if (new_data_size < submit->data_size)
+		return -EINVAL;
+
+	buf = kvrealloc(buf, submit->data_size, new_data_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	memmove(buf + in_fences_size, buf, submit->data_size);
+	in_fences = buf;
+
+	for (i = 0; i < submit->num_in_fences; i++)
+		in_fences[i] = cpu_to_le64(submit->in_fences[i].id);
+
+	submit->data_size = new_data_size;
+	submit->buf = buf;
+
+	return 0;
+}
+
+static int virtio_gpu_submit(struct virtio_gpu_submit *submit)
+{
+	int err;
+
+	if (submit->num_in_fences) {
+		err = virtio_gpu_attach_in_fences(submit);
+		if (err)
+			return err;
+	}
+
+	virtio_gpu_cmd_submit(submit->vgdev, submit->buf, submit->data_size,
 			      submit->vfpriv->ctx_id, submit->buflist,
-			      submit->out_fence);
+			      submit->out_fence, submit->cmd_size,
+			      submit->num_in_fences);
+
 	virtio_gpu_notify(submit->vgdev);
+
+	return 0;
 }
 
 static void virtio_gpu_complete_submit(struct virtio_gpu_submit *submit)
@@ -401,6 +484,12 @@  static int virtio_gpu_init_submit(struct virtio_gpu_submit *submit,
 		}
 	}
 
+	if ((exbuf->flags & VIRTGPU_EXECBUF_SHARED_FENCE) &&
+	    vfpriv->fence_passing_enabled && out_fence)
+		out_fence->host_shareable = true;
+
+	submit->data_size = exbuf->size;
+	submit->cmd_size = exbuf->size;
 	submit->out_fence = out_fence;
 	submit->fence_ctx = fence_ctx;
 	submit->ring_idx = ring_idx;
@@ -527,7 +616,9 @@  int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto cleanup;
 
-	virtio_gpu_submit(&submit);
+	ret = virtio_gpu_submit(&submit);
+	if (ret)
+		goto cleanup;
 
 	/*
 	 * Set up usr-out data after submitting the job to optimize
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c
index b1a00c0c25a7..29d462b69bad 100644
--- a/drivers/gpu/drm/virtio/virtgpu_vq.c
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -1079,7 +1079,9 @@  void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev,
 			   void *data, uint32_t data_size,
 			   uint32_t ctx_id,
 			   struct virtio_gpu_object_array *objs,
-			   struct virtio_gpu_fence *fence)
+			   struct virtio_gpu_fence *fence,
+			   uint32_t cmd_size,
+			   unsigned int num_in_fences)
 {
 	struct virtio_gpu_cmd_submit *cmd_p;
 	struct virtio_gpu_vbuffer *vbuf;
@@ -1093,7 +1095,8 @@  void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev,
 
 	cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_SUBMIT_3D);
 	cmd_p->hdr.ctx_id = cpu_to_le32(ctx_id);
-	cmd_p->size = cpu_to_le32(data_size);
+	cmd_p->size = cpu_to_le32(cmd_size);
+	cmd_p->num_in_fences = cpu_to_le32(num_in_fences);
 
 	virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, fence);
 }
diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index b1d0e56565bc..fd486fdf0441 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -52,10 +52,12 @@  extern "C" {
 #define VIRTGPU_EXECBUF_FENCE_FD_IN	0x01
 #define VIRTGPU_EXECBUF_FENCE_FD_OUT	0x02
 #define VIRTGPU_EXECBUF_RING_IDX	0x04
+#define VIRTGPU_EXECBUF_SHARED_FENCE	0x08
 #define VIRTGPU_EXECBUF_FLAGS  (\
 		VIRTGPU_EXECBUF_FENCE_FD_IN |\
 		VIRTGPU_EXECBUF_FENCE_FD_OUT |\
 		VIRTGPU_EXECBUF_RING_IDX |\
+		VIRTGPU_EXECBUF_SHARED_FENCE |\
 		0)
 
 struct drm_virtgpu_map {
@@ -198,6 +200,7 @@  struct drm_virtgpu_resource_create_blob {
 #define VIRTGPU_CONTEXT_PARAM_CAPSET_ID       0x0001
 #define VIRTGPU_CONTEXT_PARAM_NUM_RINGS       0x0002
 #define VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK 0x0003
+#define VIRTGPU_CONTEXT_PARAM_FENCE_PASSING   0x0004
 struct drm_virtgpu_context_set_param {
 	__u64 param;
 	__u64 value;
diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index f556fde07b76..c3182c8255cf 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -65,6 +65,11 @@ 
  */
 #define VIRTIO_GPU_F_CONTEXT_INIT        4
 
+/*
+ * VIRTIO_GPU_CMD_SUBMIT_3D
+ */
+#define VIRTIO_GPU_F_FENCE_PASSING       5
+
 enum virtio_gpu_ctrl_type {
 	VIRTIO_GPU_UNDEFINED = 0,
 
@@ -133,6 +138,10 @@  enum virtio_gpu_shm_id {
  * of the command ring that needs to used when creating the fence
  */
 #define VIRTIO_GPU_FLAG_INFO_RING_IDX (1 << 1)
+/*
+ * The fence is shareable between host contexts if flag is set.
+ */
+#define VIRTIO_GPU_FLAG_FENCE_SHAREABLE (1 << 2)
 
 struct virtio_gpu_ctrl_hdr {
 	__le32 type;
@@ -304,7 +313,7 @@  struct virtio_gpu_ctx_resource {
 struct virtio_gpu_cmd_submit {
 	struct virtio_gpu_ctrl_hdr hdr;
 	__le32 size;
-	__le32 padding;
+	__le32 num_in_fences;
 };
 
 #define VIRTIO_GPU_CAPSET_VIRGL 1