diff mbox series

[2/6] drm/amdkfd: Implement DMA buf fd export from KFD

Message ID 20230112013157.750568-3-Felix.Kuehling@amd.com (mailing list archive)
State New, archived
Headers show
Series Enable KFD to use render node BO mappings | expand

Commit Message

Felix Kuehling Jan. 12, 2023, 1:31 a.m. UTC
Exports a DMA buf fd of a given KFD buffer handle. This is intended for
being able to import KFD BOs into GEM contexts to leverage the
amdgpu_bo_va API for more flexible virtual address mappings. It will
also be used for the new upstreamable RDMA solution coming to UCX and
RCCL.

The corresponding user mode change (Thunk API and kfdtest) is here:
https://github.com/fxkamd/ROCT-Thunk-Interface/commits/fxkamd/dmabuf

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  2 +
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 45 +++++++++++----
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 55 +++++++++++++++++++
 include/uapi/linux/kfd_ioctl.h                | 14 ++++-
 4 files changed, 104 insertions(+), 12 deletions(-)

Comments

Chen, Xiaogang Jan. 13, 2023, 8:03 a.m. UTC | #1
Reviewed-by: Xiaogang Chen <Xiaoganng.Chen@amd.com>

Regards

Xiaogang

On 1/11/2023 7:31 PM, Felix Kuehling wrote:
> Exports a DMA buf fd of a given KFD buffer handle. This is intended for
> being able to import KFD BOs into GEM contexts to leverage the
> amdgpu_bo_va API for more flexible virtual address mappings. It will
> also be used for the new upstreamable RDMA solution coming to UCX and
> RCCL.
>
> The corresponding user mode change (Thunk API and kfdtest) is here:
> https://github.com/fxkamd/ROCT-Thunk-Interface/commits/fxkamd/dmabuf
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  2 +
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 45 +++++++++++----
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 55 +++++++++++++++++++
>   include/uapi/linux/kfd_ioctl.h                | 14 ++++-
>   4 files changed, 104 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 333780491867..01ba3589b60a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -308,6 +308,8 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
>   				      uint64_t va, void *drm_priv,
>   				      struct kgd_mem **mem, uint64_t *size,
>   				      uint64_t *mmap_offset);
> +int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
> +				      struct dma_buf **dmabuf);
>   int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
>   				struct tile_config *config);
>   void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index e13c3493b786..5645103beed0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -710,6 +710,21 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
>   	}
>   }
>   
> +static int kfd_mem_export_dmabuf(struct kgd_mem *mem)
> +{
> +	if (!mem->dmabuf) {
> +		struct dma_buf *ret = amdgpu_gem_prime_export(
> +			&mem->bo->tbo.base,
> +			mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
> +				DRM_RDWR : 0);
> +		if (IS_ERR(ret))
> +			return PTR_ERR(ret);
> +		mem->dmabuf = ret;
> +	}
> +
> +	return 0;
> +}
> +
>   static int
>   kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
>   		      struct amdgpu_bo **bo)
> @@ -717,16 +732,9 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
>   	struct drm_gem_object *gobj;
>   	int ret;
>   
> -	if (!mem->dmabuf) {
> -		mem->dmabuf = amdgpu_gem_prime_export(&mem->bo->tbo.base,
> -			mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
> -				DRM_RDWR : 0);
> -		if (IS_ERR(mem->dmabuf)) {
> -			ret = PTR_ERR(mem->dmabuf);
> -			mem->dmabuf = NULL;
> -			return ret;
> -		}
> -	}
> +	ret = kfd_mem_export_dmabuf(mem);
> +	if (ret)
> +		return ret;
>   
>   	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
>   	if (IS_ERR(gobj))
> @@ -2267,6 +2275,23 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
>   	return ret;
>   }
>   
> +int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
> +				      struct dma_buf **dma_buf)
> +{
> +	int ret;
> +
> +	mutex_lock(&mem->lock);
> +	ret = kfd_mem_export_dmabuf(mem);
> +	if (ret)
> +		goto out;
> +
> +	get_dma_buf(mem->dmabuf);
> +	*dma_buf = mem->dmabuf;
> +out:
> +	mutex_unlock(&mem->lock);
> +	return ret;
> +}
> +
>   /* Evict a userptr BO by stopping the queues if necessary
>    *
>    * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index f79b8e964140..bcf2263927d6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1572,6 +1572,58 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
>   	return r;
>   }
>   
> +static int kfd_ioctl_export_dmabuf(struct file *filep,
> +				   struct kfd_process *p, void *data)
> +{
> +	struct kfd_ioctl_export_dmabuf_args *args = data;
> +	struct kfd_process_device *pdd;
> +	struct dma_buf *dmabuf;
> +	struct kfd_dev *dev;
> +	void *mem;
> +	int ret = 0;
> +
> +	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
> +	if (!dev)
> +		return -EINVAL;
> +
> +	mutex_lock(&p->mutex);
> +
> +	pdd = kfd_get_process_device_data(dev, p);
> +	if (!pdd) {
> +		ret = -EINVAL;
> +		goto err_unlock;
> +	}
> +
> +	mem = kfd_process_device_translate_handle(pdd,
> +						GET_IDR_HANDLE(args->handle));
> +	if (!mem) {
> +		ret = -EINVAL;
> +		goto err_unlock;
> +	}
> +
> +	ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);
> +	mutex_unlock(&p->mutex);
> +	if (ret)
> +		goto err_out;
> +
> +	ret = dma_buf_fd(dmabuf, args->flags);
> +	if (ret < 0) {
> +		dma_buf_put(dmabuf);
> +		goto err_out;
> +	}
> +	/* dma_buf_fd assigns the reference count to the fd, no need to
> +	 * put the reference here.
> +	 */
> +	args->dmabuf_fd = ret;
> +
> +	return 0;
> +
> +err_unlock:
> +	mutex_unlock(&p->mutex);
> +err_out:
> +	return ret;
> +}
> +
>   /* Handle requests for watching SMI events */
>   static int kfd_ioctl_smi_events(struct file *filep,
>   				struct kfd_process *p, void *data)
> @@ -2754,6 +2806,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
>   
>   	AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,
>   			kfd_ioctl_get_available_memory, 0),
> +
> +	AMDKFD_IOCTL_DEF(AMDKFD_IOC_EXPORT_DMABUF,
> +				kfd_ioctl_export_dmabuf, 0),
>   };
>   
>   #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index 42b60198b6c5..2da5c3ad71bd 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -37,9 +37,10 @@
>    * - 1.9 - Add available memory ioctl
>    * - 1.10 - Add SMI profiler event log
>    * - 1.11 - Add unified memory for ctx save/restore area
> + * - 1.12 - Add DMA buf export ioctl
>    */
>   #define KFD_IOCTL_MAJOR_VERSION 1
> -#define KFD_IOCTL_MINOR_VERSION 11
> +#define KFD_IOCTL_MINOR_VERSION 12
>   
>   struct kfd_ioctl_get_version_args {
>   	__u32 major_version;	/* from KFD */
> @@ -463,6 +464,12 @@ struct kfd_ioctl_import_dmabuf_args {
>   	__u32 dmabuf_fd;	/* to KFD */
>   };
>   
> +struct kfd_ioctl_export_dmabuf_args {
> +	__u64 handle;		/* to KFD */
> +	__u32 flags;		/* to KFD */
> +	__u32 dmabuf_fd;	/* from KFD */
> +};
> +
>   /*
>    * KFD SMI(System Management Interface) events
>    */
> @@ -877,7 +884,10 @@ struct kfd_ioctl_set_xnack_mode_args {
>   #define AMDKFD_IOC_AVAILABLE_MEMORY		\
>   		AMDKFD_IOWR(0x23, struct kfd_ioctl_get_available_memory_args)
>   
> +#define AMDKFD_IOC_EXPORT_DMABUF		\
> +		AMDKFD_IOWR(0x24, struct kfd_ioctl_export_dmabuf_args)
> +
>   #define AMDKFD_COMMAND_START		0x01
> -#define AMDKFD_COMMAND_END		0x24
> +#define AMDKFD_COMMAND_END		0x25
>   
>   #endif
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 333780491867..01ba3589b60a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -308,6 +308,8 @@  int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
 				      uint64_t va, void *drm_priv,
 				      struct kgd_mem **mem, uint64_t *size,
 				      uint64_t *mmap_offset);
+int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
+				      struct dma_buf **dmabuf);
 int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
 				struct tile_config *config);
 void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index e13c3493b786..5645103beed0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -710,6 +710,21 @@  kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
 	}
 }
 
+static int kfd_mem_export_dmabuf(struct kgd_mem *mem)
+{
+	if (!mem->dmabuf) {
+		struct dma_buf *ret = amdgpu_gem_prime_export(
+			&mem->bo->tbo.base,
+			mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+				DRM_RDWR : 0);
+		if (IS_ERR(ret))
+			return PTR_ERR(ret);
+		mem->dmabuf = ret;
+	}
+
+	return 0;
+}
+
 static int
 kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
 		      struct amdgpu_bo **bo)
@@ -717,16 +732,9 @@  kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
 	struct drm_gem_object *gobj;
 	int ret;
 
-	if (!mem->dmabuf) {
-		mem->dmabuf = amdgpu_gem_prime_export(&mem->bo->tbo.base,
-			mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
-				DRM_RDWR : 0);
-		if (IS_ERR(mem->dmabuf)) {
-			ret = PTR_ERR(mem->dmabuf);
-			mem->dmabuf = NULL;
-			return ret;
-		}
-	}
+	ret = kfd_mem_export_dmabuf(mem);
+	if (ret)
+		return ret;
 
 	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
 	if (IS_ERR(gobj))
@@ -2267,6 +2275,23 @@  int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
 	return ret;
 }
 
+int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
+				      struct dma_buf **dma_buf)
+{
+	int ret;
+
+	mutex_lock(&mem->lock);
+	ret = kfd_mem_export_dmabuf(mem);
+	if (ret)
+		goto out;
+
+	get_dma_buf(mem->dmabuf);
+	*dma_buf = mem->dmabuf;
+out:
+	mutex_unlock(&mem->lock);
+	return ret;
+}
+
 /* Evict a userptr BO by stopping the queues if necessary
  *
  * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index f79b8e964140..bcf2263927d6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1572,6 +1572,58 @@  static int kfd_ioctl_import_dmabuf(struct file *filep,
 	return r;
 }
 
+static int kfd_ioctl_export_dmabuf(struct file *filep,
+				   struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_export_dmabuf_args *args = data;
+	struct kfd_process_device *pdd;
+	struct dma_buf *dmabuf;
+	struct kfd_dev *dev;
+	void *mem;
+	int ret = 0;
+
+	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+	if (!dev)
+		return -EINVAL;
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_get_process_device_data(dev, p);
+	if (!pdd) {
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	mem = kfd_process_device_translate_handle(pdd,
+						GET_IDR_HANDLE(args->handle));
+	if (!mem) {
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);
+	mutex_unlock(&p->mutex);
+	if (ret)
+		goto err_out;
+
+	ret = dma_buf_fd(dmabuf, args->flags);
+	if (ret < 0) {
+		dma_buf_put(dmabuf);
+		goto err_out;
+	}
+	/* dma_buf_fd assigns the reference count to the fd, no need to
+	 * put the reference here.
+	 */
+	args->dmabuf_fd = ret;
+
+	return 0;
+
+err_unlock:
+	mutex_unlock(&p->mutex);
+err_out:
+	return ret;
+}
+
 /* Handle requests for watching SMI events */
 static int kfd_ioctl_smi_events(struct file *filep,
 				struct kfd_process *p, void *data)
@@ -2754,6 +2806,9 @@  static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,
 			kfd_ioctl_get_available_memory, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_EXPORT_DMABUF,
+				kfd_ioctl_export_dmabuf, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 42b60198b6c5..2da5c3ad71bd 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -37,9 +37,10 @@ 
  * - 1.9 - Add available memory ioctl
  * - 1.10 - Add SMI profiler event log
  * - 1.11 - Add unified memory for ctx save/restore area
+ * - 1.12 - Add DMA buf export ioctl
  */
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 11
+#define KFD_IOCTL_MINOR_VERSION 12
 
 struct kfd_ioctl_get_version_args {
 	__u32 major_version;	/* from KFD */
@@ -463,6 +464,12 @@  struct kfd_ioctl_import_dmabuf_args {
 	__u32 dmabuf_fd;	/* to KFD */
 };
 
+struct kfd_ioctl_export_dmabuf_args {
+	__u64 handle;		/* to KFD */
+	__u32 flags;		/* to KFD */
+	__u32 dmabuf_fd;	/* from KFD */
+};
+
 /*
  * KFD SMI(System Management Interface) events
  */
@@ -877,7 +884,10 @@  struct kfd_ioctl_set_xnack_mode_args {
 #define AMDKFD_IOC_AVAILABLE_MEMORY		\
 		AMDKFD_IOWR(0x23, struct kfd_ioctl_get_available_memory_args)
 
+#define AMDKFD_IOC_EXPORT_DMABUF		\
+		AMDKFD_IOWR(0x24, struct kfd_ioctl_export_dmabuf_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x24
+#define AMDKFD_COMMAND_END		0x25
 
 #endif