diff mbox series

[RFC,v4,09/11] drm/amdgpu: Move in_gpu_reset into reset_domain

Message ID 20220209002320.6077-10-andrey.grodzovsky@amd.com (mailing list archive)
State New, archived
Headers show
Series Define and use reset domain for GPU recovery in amdgpu | expand

Commit Message

Andrey Grodzovsky Feb. 9, 2022, 12:23 a.m. UTC
We should have a single instance per entrire reset domain.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Suggested-by: Lijo Lazar <lijo.lazar@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  7 ++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c  |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h  |  1 +
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c      |  4 ++--
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c      |  4 ++--
 6 files changed, 15 insertions(+), 12 deletions(-)

Comments

Christian König Feb. 9, 2022, 8 a.m. UTC | #1
Am 09.02.22 um 01:23 schrieb Andrey Grodzovsky:
> We should have a single instance per entrire reset domain.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> Suggested-by: Lijo Lazar <lijo.lazar@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  7 ++-----
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +++++++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c  |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h  |  1 +
>   drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c      |  4 ++--
>   drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c      |  4 ++--
>   6 files changed, 15 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index ddfbcc8fd3d3..b89406b01694 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1056,7 +1056,6 @@ struct amdgpu_device {
>   	bool				in_s4;
>   	bool				in_s0ix;
>   
> -	atomic_t 			in_gpu_reset;
>   	enum pp_mp1_state               mp1_state;
>   	struct amdgpu_doorbell_index doorbell_index;
>   
> @@ -1463,8 +1462,6 @@ static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)
>          return adev->gmc.tmz_enabled;
>   }
>   
> -static inline int amdgpu_in_reset(struct amdgpu_device *adev)
> -{
> -	return atomic_read(&adev->in_gpu_reset);
> -}
> +int amdgpu_in_reset(struct amdgpu_device *adev);
> +
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index dcbb175d336f..e05d7cbefd2c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3554,7 +3554,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   	mutex_init(&adev->mn_lock);
>   	mutex_init(&adev->virt.vf_errors.lock);
>   	hash_init(adev->mn_hash);
> -	atomic_set(&adev->in_gpu_reset, 0);
>   	mutex_init(&adev->psp.mutex);
>   	mutex_init(&adev->notifier_lock);
>   
> @@ -4829,7 +4828,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
>   static void amdgpu_device_lock_adev(struct amdgpu_device *adev,
>   				struct amdgpu_hive_info *hive)
>   {
> -	atomic_set(&adev->in_gpu_reset, 1);
> +	atomic_set(&adev->reset_domain->in_gpu_reset, 1);
>   
>   	if (hive) {
>   		down_write_nest_lock(&adev->reset_domain->sem, &hive->hive_lock);
> @@ -4854,7 +4853,7 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
>   {
>   	amdgpu_vf_error_trans_all(adev);
>   	adev->mp1_state = PP_MP1_STATE_NONE;
> -	atomic_set(&adev->in_gpu_reset, 0);
> +	atomic_set(&adev->reset_domain->in_gpu_reset, 0);
>   	up_write(&adev->reset_domain->sem);
>   }
>   
> @@ -5699,6 +5698,11 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
>   	amdgpu_asic_invalidate_hdp(adev, ring);
>   }
>   
> +int amdgpu_in_reset(struct amdgpu_device *adev)
> +{
> +	return atomic_read(&adev->reset_domain->in_gpu_reset);
> +	}
> +	
>   /**
>    * amdgpu_device_halt() - bring hardware to some kind of halt state
>    *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> index c0988c804459..5ab72c3bfbda 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> @@ -131,6 +131,7 @@ struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_d
>   
>   	}
>   
> +	atomic_set(&reset_domain->in_gpu_reset, 0);
>   	init_rwsem(&reset_domain->sem);
>   
>   	return reset_domain;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
> index 80f918e87d4f..ea6fc98ea927 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
> @@ -81,6 +81,7 @@ struct amdgpu_reset_domain {
>   	struct workqueue_struct *wq;
>   	enum amdgpu_reset_domain_type type;
>   	struct rw_semaphore sem;
> +	atomic_t in_gpu_reset;
>   };
>   
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> index 4e23c29e665c..b81acf59870c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> @@ -259,7 +259,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
>   	 * otherwise the mailbox msg will be ruined/reseted by
>   	 * the VF FLR.
>   	 */
> -	if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
> +	if (atomic_cmpxchg(&adev->reset_domain->in_gpu_reset, 0, 1) != 0)
>   		return;
>   
>   	down_write(&adev->reset_domain->sem);
> @@ -277,7 +277,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
>   	} while (timeout > 1);
>   
>   flr_done:
> -	atomic_set(&adev->in_gpu_reset, 0);
> +	atomic_set(&adev->reset_domain->in_gpu_reset, 0);
>   	up_write(&adev->reset_domain->sem);
>   
>   	/* Trigger recovery for world switch failure if no TDR */
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
> index f715780f7d20..22c10b97ea81 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
> @@ -283,7 +283,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
>   	 * otherwise the mailbox msg will be ruined/reseted by
>   	 * the VF FLR.
>   	 */
> -	if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
> +	if (atomic_cmpxchg(&adev->reset_domain->in_gpu_reset, 0, 1) != 0)
>   		return;
>   
>   	down_write(&adev->reset_domain->sem);
> @@ -301,7 +301,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
>   	} while (timeout > 1);
>   
>   flr_done:
> -	atomic_set(&adev->in_gpu_reset, 0);
> +	atomic_set(&adev->reset_domain->in_gpu_reset, 0);
>   	up_write(&adev->reset_domain->sem);
>   
>   	/* Trigger recovery for world switch failure if no TDR */
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ddfbcc8fd3d3..b89406b01694 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1056,7 +1056,6 @@  struct amdgpu_device {
 	bool				in_s4;
 	bool				in_s0ix;
 
-	atomic_t 			in_gpu_reset;
 	enum pp_mp1_state               mp1_state;
 	struct amdgpu_doorbell_index doorbell_index;
 
@@ -1463,8 +1462,6 @@  static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)
        return adev->gmc.tmz_enabled;
 }
 
-static inline int amdgpu_in_reset(struct amdgpu_device *adev)
-{
-	return atomic_read(&adev->in_gpu_reset);
-}
+int amdgpu_in_reset(struct amdgpu_device *adev);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index dcbb175d336f..e05d7cbefd2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3554,7 +3554,6 @@  int amdgpu_device_init(struct amdgpu_device *adev,
 	mutex_init(&adev->mn_lock);
 	mutex_init(&adev->virt.vf_errors.lock);
 	hash_init(adev->mn_hash);
-	atomic_set(&adev->in_gpu_reset, 0);
 	mutex_init(&adev->psp.mutex);
 	mutex_init(&adev->notifier_lock);
 
@@ -4829,7 +4828,7 @@  int amdgpu_do_asic_reset(struct list_head *device_list_handle,
 static void amdgpu_device_lock_adev(struct amdgpu_device *adev,
 				struct amdgpu_hive_info *hive)
 {
-	atomic_set(&adev->in_gpu_reset, 1);
+	atomic_set(&adev->reset_domain->in_gpu_reset, 1);
 
 	if (hive) {
 		down_write_nest_lock(&adev->reset_domain->sem, &hive->hive_lock);
@@ -4854,7 +4853,7 @@  static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
 {
 	amdgpu_vf_error_trans_all(adev);
 	adev->mp1_state = PP_MP1_STATE_NONE;
-	atomic_set(&adev->in_gpu_reset, 0);
+	atomic_set(&adev->reset_domain->in_gpu_reset, 0);
 	up_write(&adev->reset_domain->sem);
 }
 
@@ -5699,6 +5698,11 @@  void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
 	amdgpu_asic_invalidate_hdp(adev, ring);
 }
 
+int amdgpu_in_reset(struct amdgpu_device *adev)
+{
+	return atomic_read(&adev->reset_domain->in_gpu_reset);
+	}
+	
 /**
  * amdgpu_device_halt() - bring hardware to some kind of halt state
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index c0988c804459..5ab72c3bfbda 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -131,6 +131,7 @@  struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_d
 
 	}
 
+	atomic_set(&reset_domain->in_gpu_reset, 0);
 	init_rwsem(&reset_domain->sem);
 
 	return reset_domain;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 80f918e87d4f..ea6fc98ea927 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -81,6 +81,7 @@  struct amdgpu_reset_domain {
 	struct workqueue_struct *wq;
 	enum amdgpu_reset_domain_type type;
 	struct rw_semaphore sem;
+	atomic_t in_gpu_reset;
 };
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 4e23c29e665c..b81acf59870c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -259,7 +259,7 @@  static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
 	 * otherwise the mailbox msg will be ruined/reseted by
 	 * the VF FLR.
 	 */
-	if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
+	if (atomic_cmpxchg(&adev->reset_domain->in_gpu_reset, 0, 1) != 0)
 		return;
 
 	down_write(&adev->reset_domain->sem);
@@ -277,7 +277,7 @@  static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
 	} while (timeout > 1);
 
 flr_done:
-	atomic_set(&adev->in_gpu_reset, 0);
+	atomic_set(&adev->reset_domain->in_gpu_reset, 0);
 	up_write(&adev->reset_domain->sem);
 
 	/* Trigger recovery for world switch failure if no TDR */
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index f715780f7d20..22c10b97ea81 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -283,7 +283,7 @@  static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
 	 * otherwise the mailbox msg will be ruined/reseted by
 	 * the VF FLR.
 	 */
-	if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
+	if (atomic_cmpxchg(&adev->reset_domain->in_gpu_reset, 0, 1) != 0)
 		return;
 
 	down_write(&adev->reset_domain->sem);
@@ -301,7 +301,7 @@  static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
 	} while (timeout > 1);
 
 flr_done:
-	atomic_set(&adev->in_gpu_reset, 0);
+	atomic_set(&adev->reset_domain->in_gpu_reset, 0);
 	up_write(&adev->reset_domain->sem);
 
 	/* Trigger recovery for world switch failure if no TDR */