diff mbox series

[4/6] drm/amdgpu: Wait for all clients importing out dma-bufs.

Message ID 1589050310-19666-5-git-send-email-andrey.grodzovsky@amd.com (mailing list archive)
State New, archived
Headers show
Series RFC Support hot device unplug in amdgpu | expand

Commit Message

Andrey Grodzovsky May 9, 2020, 6:51 p.m. UTC
Also avoid GPU recovery if device is unplagged

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 27 +++++++++++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  4 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     |  9 +++++++++
 4 files changed, 38 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 79274d5..f212622 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -992,6 +992,7 @@  struct amdgpu_device {
 	char				serial[16];
 
 	wait_queue_head_t		user_clients_done;
+	atomic_t 			exported_dma_bufs_count;
 };
 
 static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index ffeb20f..479ff98 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -36,6 +36,7 @@ 
 #include "amdgpu_gem.h"
 #include "amdgpu_dma_buf.h"
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
 #include <linux/dma-buf.h>
 #include <linux/dma-fence-array.h>
 
@@ -116,6 +117,7 @@  int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
 		return ret;
 
 	ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev);
+
 	drm_vma_node_revoke(&obj->vma_node, vma->vm_file->private_data);
 
 	return ret;
@@ -179,6 +181,9 @@  static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	int r;
 
+	if (drm_dev_is_unplugged(adev->ddev))
+		return -ENODEV;
+
 	if (attach->dev->driver == adev->dev->driver)
 		return 0;
 
@@ -363,6 +368,19 @@  static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
 	return ret;
 }
 
+
+static void amdgpu_dma_buf_release(struct dma_buf *dma_buf)
+{
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+	drm_gem_dmabuf_release(dma_buf);
+
+	atomic_dec(&adev->exported_dma_bufs_count);
+	wake_up(&adev->user_clients_done);
+
+}
+
 const struct dma_buf_ops amdgpu_dmabuf_ops = {
 	.attach = amdgpu_dma_buf_attach,
 	.detach = amdgpu_dma_buf_detach,
@@ -370,13 +388,14 @@  const struct dma_buf_ops amdgpu_dmabuf_ops = {
 	.unpin = amdgpu_dma_buf_unpin,
 	.map_dma_buf = amdgpu_dma_buf_map,
 	.unmap_dma_buf = amdgpu_dma_buf_unmap,
-	.release = drm_gem_dmabuf_release,
+	.release = amdgpu_dma_buf_release,
 	.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
 	.mmap = drm_gem_dmabuf_mmap,
 	.vmap = drm_gem_dmabuf_vmap,
 	.vunmap = drm_gem_dmabuf_vunmap,
 };
 
+
 /**
  * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
  * @gobj: GEM BO
@@ -391,6 +410,7 @@  struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
 					int flags)
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct dma_buf *buf;
 
 	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
@@ -398,8 +418,10 @@  struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
 		return ERR_PTR(-EPERM);
 
 	buf = drm_gem_prime_export(gobj, flags);
-	if (!IS_ERR(buf))
+	if (!IS_ERR(buf)) {
 		buf->ops = &amdgpu_dmabuf_ops;
+		atomic_inc(&((adev)->exported_dma_bufs_count));
+	}
 
 	return buf;
 }
@@ -558,5 +580,6 @@  struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
 
 	get_dma_buf(dma_buf);
 	obj->import_attach = attach;
+
 	return obj;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0531727..11410a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1146,7 +1146,9 @@  amdgpu_pci_remove(struct pci_dev *pdev)
 	drm_dev_unplug(dev);
 
 	amdgpu_force_unmap_user_space_mappings(dev);
-	wait_event(adev->user_clients_done, (dev->open_count == 0));
+	wait_event(adev->user_clients_done,
+		   !atomic_read(&dev->open_count) &&
+		   !atomic_read(&adev->exported_dma_bufs_count));
 
 	amdgpu_driver_unload_kms(dev);
 	pci_disable_device(pdev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4720718..20cf36d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -28,6 +28,9 @@ 
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
+#include <drm/drm_drv.h>
+
+
 static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 {
 	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
@@ -37,6 +40,12 @@  static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 
 	memset(&ti, 0, sizeof(struct amdgpu_task_info));
 
+
+	if (drm_dev_is_unplugged(adev->ddev)) {
+		DRM_WARN("amdgpu_job_timedout - device is unplugged, skiping!");
+		return;
+	}
+
 	if (amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
 		DRM_ERROR("ring %s timeout, but soft recovered\n",
 			  s_job->sched->name);