diff mbox series

[RFC,v4,11/11] Revert 'drm/amdgpu: annotate a false positive recursive locking'

Message ID 20220209002320.6077-12-andrey.grodzovsky@amd.com (mailing list archive)
State New, archived
Headers show
Series Define and use reset domain for GPU recovery in amdgpu | expand

Commit Message

Andrey Grodzovsky Feb. 9, 2022, 12:23 a.m. UTC
Since we have a single instance of reset semaphore which we
lock only once even for XGMI hive we don't need the nested
locking hint anymore.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

Comments

Christian König Feb. 9, 2022, 8:06 a.m. UTC | #1
Am 09.02.22 um 01:23 schrieb Andrey Grodzovsky:
> Since we have a single instance of reset semaphore which we
> lock only once even for XGMI hive we don't need the nested
> locking hint anymore.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

Oh, yes please :)

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 ++++----------
>   1 file changed, 4 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index aaecf0797484..75d0dd289023 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -4825,16 +4825,10 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
>   	return r;
>   }
>   
> -static void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain,
> -					    struct amdgpu_hive_info *hive)
> +static void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain)
>   {
>   	atomic_set(&reset_domain->in_gpu_reset, 1);
> -
> -	if (hive) {
> -		down_write_nest_lock(&reset_domain->sem, &hive->hive_lock);
> -	} else {
> -		down_write(&reset_domain->sem);
> -	}
> +	down_write(&reset_domain->sem);
>   }
>   
>   static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
> @@ -5072,7 +5066,7 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
>   	/* We need to lock reset domain only once both for XGMI and single device */
>   	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
>   				    reset_list);
> -	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain, hive);
> +	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
>   
>   	/* block all schedulers and reset given job's ring */
>   	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
> @@ -5496,7 +5490,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
>   		 * Locking adev->reset_domain->sem will prevent any external access
>   		 * to GPU during PCI error recovery
>   		 */
> -		amdgpu_device_lock_reset_domain(adev->reset_domain, NULL);
> +		amdgpu_device_lock_reset_domain(adev->reset_domain);
>   		amdgpu_device_set_mp1_state(adev);
>   
>   		/*
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index aaecf0797484..75d0dd289023 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4825,16 +4825,10 @@  int amdgpu_do_asic_reset(struct list_head *device_list_handle,
 	return r;
 }
 
-static void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain,
-					    struct amdgpu_hive_info *hive)
+static void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain)
 {
 	atomic_set(&reset_domain->in_gpu_reset, 1);
-
-	if (hive) {
-		down_write_nest_lock(&reset_domain->sem, &hive->hive_lock);
-	} else {
-		down_write(&reset_domain->sem);
-	}
+	down_write(&reset_domain->sem);
 }
 
 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
@@ -5072,7 +5066,7 @@  int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
 	/* We need to lock reset domain only once both for XGMI and single device */
 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
 				    reset_list);
-	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain, hive);
+	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
 
 	/* block all schedulers and reset given job's ring */
 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
@@ -5496,7 +5490,7 @@  pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
 		 * Locking adev->reset_domain->sem will prevent any external access
 		 * to GPU during PCI error recovery
 		 */
-		amdgpu_device_lock_reset_domain(adev->reset_domain, NULL);
+		amdgpu_device_lock_reset_domain(adev->reset_domain);
 		amdgpu_device_set_mp1_state(adev);
 
 		/*