diff mbox series

[v4,08/14] drm/amdgpu: Fix a bunch of sdma code crash post device unplug

Message ID 1611003683-3534-9-git-send-email-andrey.grodzovsky@amd.com (mailing list archive)
State New, archived
Headers show
Series RFC Support hot device unplug in amdgpu | expand

Commit Message

Andrey Grodzovsky Jan. 18, 2021, 9:01 p.m. UTC
We can't allocate and submit IBs post device unplug.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

Comments

Christian König Jan. 19, 2021, 8:51 a.m. UTC | #1
Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:
> We can't allocate and submit IBs post device unplug.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 +++++++-
>   1 file changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index ad91c0c..5096351 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -31,6 +31,7 @@
>   #include <linux/dma-buf.h>
>   
>   #include <drm/amdgpu_drm.h>
> +#include <drm/drm_drv.h>
>   #include "amdgpu.h"
>   #include "amdgpu_trace.h"
>   #include "amdgpu_amdkfd.h"
> @@ -1604,7 +1605,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	struct amdgpu_vm_update_params params;
>   	enum amdgpu_sync_mode sync_mode;
>   	uint64_t pfn;
> -	int r;
> +	int r, idx;
> +
> +	if (!drm_dev_enter(&adev->ddev, &idx))
> +		return -ENOENT;

Why not -ENODEV?

>   
>   	memset(&params, 0, sizeof(params));
>   	params.adev = adev;
> @@ -1647,6 +1651,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	if (r)
>   		goto error_unlock;
>   
> +
> +	drm_dev_exit(idx);

That's to early. You probably need to do this much further below after 
the commit.

Christian.

>   	do {
>   		uint64_t tmp, num_entries, addr;
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ad91c0c..5096351 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -31,6 +31,7 @@ 
 #include <linux/dma-buf.h>
 
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
@@ -1604,7 +1605,10 @@  static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	struct amdgpu_vm_update_params params;
 	enum amdgpu_sync_mode sync_mode;
 	uint64_t pfn;
-	int r;
+	int r, idx;
+
+	if (!drm_dev_enter(&adev->ddev, &idx))
+		return -ENOENT;
 
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
@@ -1647,6 +1651,8 @@  static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_unlock;
 
+
+	drm_dev_exit(idx);
 	do {
 		uint64_t tmp, num_entries, addr;