Message ID | 1554739692-6999-3-git-send-email-andrey.grodzovsky@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/3] drm/scheduler: rework job destruction | expand |
Am 08.04.19 um 18:08 schrieb Andrey Grodzovsky: > Also reject TDRs if another one already running. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 94 +++++++++++++++++++++--------- > 1 file changed, 67 insertions(+), 27 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index aabd043..4446892 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -3327,10 +3327,12 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) > > static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, > struct amdgpu_job *job, > - bool *need_full_reset_arg) > + bool *need_full_reset_arg, > + bool *job_signaled) > { > int i, r = 0; > bool need_full_reset = *need_full_reset_arg; > + struct amdgpu_ring *job_ring = job ? to_amdgpu_ring(job->base.sched) : NULL; > > /* block all schedulers and reset given job's ring */ > for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { > @@ -3341,6 +3343,17 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, > > drm_sched_stop(&ring->sched, &job->base); > > + /* > + * Must check guilty signal here since after this point all old > + * HW fences are force signaled. > + * > + * job->base holds a reference to parent fence > + */ > + if (job_signaled && job && ring == job_ring && > + job->base.s_fence->parent && > + dma_fence_is_signaled(job->base.s_fence->parent)) > + *job_signaled = true; > + That won't work correctly. See when the guilty job is not on the first scheduler, you would already have force completed some before getting here. Better to stop all schedulers first and then do the check. Christian. > /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ > amdgpu_fence_driver_force_completion(ring); > } > @@ -3358,7 +3371,8 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, > > > > - if (!amdgpu_sriov_vf(adev)) { > + /* Don't suspend on bare metal if we are not going to HW reset the ASIC */ > + if (!amdgpu_sriov_vf(adev) && !(*job_signaled)) { > > if (!need_full_reset) > need_full_reset = amdgpu_device_ip_need_full_reset(adev); > @@ -3495,7 +3509,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, > return r; > } > > -static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev) > +static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev, bool job_signaled) > { > int i; > > @@ -3505,7 +3519,8 @@ static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev) > if (!ring || !ring->sched.thread) > continue; > > - if (!adev->asic_reset_res) > + /* No point to resubmit jobs if we didn't HW reset*/ > + if (!adev->asic_reset_res && !job_signaled) > drm_sched_resubmit_jobs(&ring->sched); > > drm_sched_start(&ring->sched, !adev->asic_reset_res); > @@ -3518,14 +3533,21 @@ static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev) > adev->asic_reset_res = 0; > } > > -static void amdgpu_device_lock_adev(struct amdgpu_device *adev) > +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock) > { > - mutex_lock(&adev->lock_reset); > + if (trylock) { > + if (!mutex_trylock(&adev->lock_reset)) > + return false; > + } else > + mutex_lock(&adev->lock_reset); > + > atomic_inc(&adev->gpu_reset_counter); > adev->in_gpu_reset = 1; > /* Block kfd: SRIOV would do it separately */ > if (!amdgpu_sriov_vf(adev)) > amdgpu_amdkfd_pre_reset(adev); > + > + return true; > } > > static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) > @@ -3555,29 +3577,44 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, > { > int r; > struct amdgpu_hive_info *hive = NULL; > - bool need_full_reset = false; > struct amdgpu_device *tmp_adev = NULL; > struct list_head device_list, *device_list_handle = NULL; > + bool xgmi_topology_present, need_full_reset, job_signaled; > > + need_full_reset = job_signaled = false; > INIT_LIST_HEAD(&device_list); > > dev_info(adev->dev, "GPU reset begin!\n"); > > + hive = amdgpu_get_xgmi_hive(adev, 0); > + xgmi_topology_present = hive && adev->gmc.xgmi.num_physical_nodes > 1; > + > /* > - * In case of XGMI hive disallow concurrent resets to be triggered > - * by different nodes. No point also since the one node already executing > - * reset will also reset all the other nodes in the hive. > + * Here we trylock to avoid chain of resets executing from > + * either trigger by jobs on different adevs in XGMI hive or jobs on > + * different schedulers for same device while this tdr is running. > + * We always reset all schedulers for device and all devices for XGMI > + * hive so that should take care of them too. > */ > - hive = amdgpu_get_xgmi_hive(adev, 0); > - if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && > - !mutex_trylock(&hive->reset_lock)) > + > + if (xgmi_topology_present && !mutex_trylock(&hive->reset_lock)) { > + DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", > + job->base.id, hive->hive_id); > return 0; > + } > > /* Start with adev pre asic reset first for soft reset check.*/ > - amdgpu_device_lock_adev(adev); > + if (!amdgpu_device_lock_adev(adev, !xgmi_topology_present)) { > + DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress", > + job->base.id); > + return 0; > + } > + > + /* Guilty job will be freed after this*/ > r = amdgpu_device_pre_asic_reset(adev, > job, > - &need_full_reset); > + &need_full_reset, > + &job_signaled); > if (r) { > /*TODO Should we stop ?*/ > DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", > @@ -3609,10 +3646,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, > if (tmp_adev == adev) > continue; > > - amdgpu_device_lock_adev(tmp_adev); > + amdgpu_device_lock_adev(tmp_adev, false); > r = amdgpu_device_pre_asic_reset(tmp_adev, > NULL, > - &need_full_reset); > + &need_full_reset, > + &job_signaled); > /*TODO Should we stop ?*/ > if (r) { > DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", > @@ -3623,19 +3661,21 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, > > /* Actual ASIC resets if needed.*/ > /* TODO Implement XGMI hive reset logic for SRIOV */ > - if (amdgpu_sriov_vf(adev)) { > - r = amdgpu_device_reset_sriov(adev, job ? false : true); > - if (r) > - adev->asic_reset_res = r; > - } else { > - r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset); > - if (r && r == -EAGAIN) > - goto retry; > + if (!job || !job_signaled) { > + if (amdgpu_sriov_vf(adev)) { > + r = amdgpu_device_reset_sriov(adev, job ? false : true); > + if (r) > + adev->asic_reset_res = r; > + } else { > + r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset); > + if (r && r == -EAGAIN) > + goto retry; > + } > } > > /* Post ASIC reset for all devs .*/ > list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { > - amdgpu_device_post_asic_reset(tmp_adev); > + amdgpu_device_post_asic_reset(tmp_adev, job_signaled); > > if (r) { > /* bad news, how to tell it to userspace ? */ > @@ -3648,7 +3688,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, > amdgpu_device_unlock_adev(tmp_adev); > } > > - if (hive && adev->gmc.xgmi.num_physical_nodes > 1) > + if (xgmi_topology_present) > mutex_unlock(&hive->reset_lock); > > if (r)
On 4/9/19 10:50 AM, Christian König wrote: > Am 08.04.19 um 18:08 schrieb Andrey Grodzovsky: >> Also reject TDRs if another one already running. >> >> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 94 >> +++++++++++++++++++++--------- >> 1 file changed, 67 insertions(+), 27 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> index aabd043..4446892 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> @@ -3327,10 +3327,12 @@ bool amdgpu_device_should_recover_gpu(struct >> amdgpu_device *adev) >> static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, >> struct amdgpu_job *job, >> - bool *need_full_reset_arg) >> + bool *need_full_reset_arg, >> + bool *job_signaled) >> { >> int i, r = 0; >> bool need_full_reset = *need_full_reset_arg; >> + struct amdgpu_ring *job_ring = job ? >> to_amdgpu_ring(job->base.sched) : NULL; >> /* block all schedulers and reset given job's ring */ >> for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { >> @@ -3341,6 +3343,17 @@ static int amdgpu_device_pre_asic_reset(struct >> amdgpu_device *adev, >> drm_sched_stop(&ring->sched, &job->base); >> + /* >> + * Must check guilty signal here since after this point all old >> + * HW fences are force signaled. >> + * >> + * job->base holds a reference to parent fence >> + */ >> + if (job_signaled && job && ring == job_ring && >> + job->base.s_fence->parent && >> + dma_fence_is_signaled(job->base.s_fence->parent)) >> + *job_signaled = true; >> + > > That won't work correctly. See when the guilty job is not on the first > scheduler, you would already have force completed some before getting > here. > > Better to stop all schedulers first and then do the check. > > Christian. What do you mean by first scheduler ? There is one scheduler object per ring so I am not clear what 'first' means here. Andrey > >> /* after all hw jobs are reset, hw fence is meaningless, so >> force_completion */ >> amdgpu_fence_driver_force_completion(ring); >> } >> @@ -3358,7 +3371,8 @@ static int amdgpu_device_pre_asic_reset(struct >> amdgpu_device *adev, >> - if (!amdgpu_sriov_vf(adev)) { >> + /* Don't suspend on bare metal if we are not going to HW reset >> the ASIC */ >> + if (!amdgpu_sriov_vf(adev) && !(*job_signaled)) { >> if (!need_full_reset) >> need_full_reset = amdgpu_device_ip_need_full_reset(adev); >> @@ -3495,7 +3509,7 @@ static int amdgpu_do_asic_reset(struct >> amdgpu_hive_info *hive, >> return r; >> } >> -static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev) >> +static void amdgpu_device_post_asic_reset(struct amdgpu_device >> *adev, bool job_signaled) >> { >> int i; >> @@ -3505,7 +3519,8 @@ static void >> amdgpu_device_post_asic_reset(struct amdgpu_device *adev) >> if (!ring || !ring->sched.thread) >> continue; >> - if (!adev->asic_reset_res) >> + /* No point to resubmit jobs if we didn't HW reset*/ >> + if (!adev->asic_reset_res && !job_signaled) >> drm_sched_resubmit_jobs(&ring->sched); >> drm_sched_start(&ring->sched, !adev->asic_reset_res); >> @@ -3518,14 +3533,21 @@ static void >> amdgpu_device_post_asic_reset(struct amdgpu_device *adev) >> adev->asic_reset_res = 0; >> } >> -static void amdgpu_device_lock_adev(struct amdgpu_device *adev) >> +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool >> trylock) >> { >> - mutex_lock(&adev->lock_reset); >> + if (trylock) { >> + if (!mutex_trylock(&adev->lock_reset)) >> + return false; >> + } else >> + mutex_lock(&adev->lock_reset); >> + >> atomic_inc(&adev->gpu_reset_counter); >> adev->in_gpu_reset = 1; >> /* Block kfd: SRIOV would do it separately */ >> if (!amdgpu_sriov_vf(adev)) >> amdgpu_amdkfd_pre_reset(adev); >> + >> + return true; >> } >> static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) >> @@ -3555,29 +3577,44 @@ int amdgpu_device_gpu_recover(struct >> amdgpu_device *adev, >> { >> int r; >> struct amdgpu_hive_info *hive = NULL; >> - bool need_full_reset = false; >> struct amdgpu_device *tmp_adev = NULL; >> struct list_head device_list, *device_list_handle = NULL; >> + bool xgmi_topology_present, need_full_reset, job_signaled; >> + need_full_reset = job_signaled = false; >> INIT_LIST_HEAD(&device_list); >> dev_info(adev->dev, "GPU reset begin!\n"); >> + hive = amdgpu_get_xgmi_hive(adev, 0); >> + xgmi_topology_present = hive && >> adev->gmc.xgmi.num_physical_nodes > 1; >> + >> /* >> - * In case of XGMI hive disallow concurrent resets to be triggered >> - * by different nodes. No point also since the one node already >> executing >> - * reset will also reset all the other nodes in the hive. >> + * Here we trylock to avoid chain of resets executing from >> + * either trigger by jobs on different adevs in XGMI hive or >> jobs on >> + * different schedulers for same device while this tdr is running. >> + * We always reset all schedulers for device and all devices for >> XGMI >> + * hive so that should take care of them too. >> */ >> - hive = amdgpu_get_xgmi_hive(adev, 0); >> - if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && >> - !mutex_trylock(&hive->reset_lock)) >> + >> + if (xgmi_topology_present && !mutex_trylock(&hive->reset_lock)) { >> + DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as >> another already in progress", >> + job->base.id, hive->hive_id); >> return 0; >> + } >> /* Start with adev pre asic reset first for soft reset check.*/ >> - amdgpu_device_lock_adev(adev); >> + if (!amdgpu_device_lock_adev(adev, !xgmi_topology_present)) { >> + DRM_INFO("Bailing on TDR for s_job:%llx, as another already >> in progress", >> + job->base.id); >> + return 0; >> + } >> + >> + /* Guilty job will be freed after this*/ >> r = amdgpu_device_pre_asic_reset(adev, >> job, >> - &need_full_reset); >> + &need_full_reset, >> + &job_signaled); >> if (r) { >> /*TODO Should we stop ?*/ >> DRM_ERROR("GPU pre asic reset failed with err, %d for drm >> dev, %s ", >> @@ -3609,10 +3646,11 @@ int amdgpu_device_gpu_recover(struct >> amdgpu_device *adev, >> if (tmp_adev == adev) >> continue; >> - amdgpu_device_lock_adev(tmp_adev); >> + amdgpu_device_lock_adev(tmp_adev, false); >> r = amdgpu_device_pre_asic_reset(tmp_adev, >> NULL, >> - &need_full_reset); >> + &need_full_reset, >> + &job_signaled); >> /*TODO Should we stop ?*/ >> if (r) { >> DRM_ERROR("GPU pre asic reset failed with err, %d for >> drm dev, %s ", >> @@ -3623,19 +3661,21 @@ int amdgpu_device_gpu_recover(struct >> amdgpu_device *adev, >> /* Actual ASIC resets if needed.*/ >> /* TODO Implement XGMI hive reset logic for SRIOV */ >> - if (amdgpu_sriov_vf(adev)) { >> - r = amdgpu_device_reset_sriov(adev, job ? false : true); >> - if (r) >> - adev->asic_reset_res = r; >> - } else { >> - r = amdgpu_do_asic_reset(hive, device_list_handle, >> &need_full_reset); >> - if (r && r == -EAGAIN) >> - goto retry; >> + if (!job || !job_signaled) { >> + if (amdgpu_sriov_vf(adev)) { >> + r = amdgpu_device_reset_sriov(adev, job ? false : true); >> + if (r) >> + adev->asic_reset_res = r; >> + } else { >> + r = amdgpu_do_asic_reset(hive, device_list_handle, >> &need_full_reset); >> + if (r && r == -EAGAIN) >> + goto retry; >> + } >> } >> /* Post ASIC reset for all devs .*/ >> list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { >> - amdgpu_device_post_asic_reset(tmp_adev); >> + amdgpu_device_post_asic_reset(tmp_adev, job_signaled); >> if (r) { >> /* bad news, how to tell it to userspace ? */ >> @@ -3648,7 +3688,7 @@ int amdgpu_device_gpu_recover(struct >> amdgpu_device *adev, >> amdgpu_device_unlock_adev(tmp_adev); >> } >> - if (hive && adev->gmc.xgmi.num_physical_nodes > 1) >> + if (xgmi_topology_present) >> mutex_unlock(&hive->reset_lock); >> if (r) >
Am 09.04.19 um 18:42 schrieb Grodzovsky, Andrey: > On 4/9/19 10:50 AM, Christian König wrote: >> Am 08.04.19 um 18:08 schrieb Andrey Grodzovsky: >>> Also reject TDRs if another one already running. >>> >>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 94 >>> +++++++++++++++++++++--------- >>> 1 file changed, 67 insertions(+), 27 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> index aabd043..4446892 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> @@ -3327,10 +3327,12 @@ bool amdgpu_device_should_recover_gpu(struct >>> amdgpu_device *adev) >>> static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, >>> struct amdgpu_job *job, >>> - bool *need_full_reset_arg) >>> + bool *need_full_reset_arg, >>> + bool *job_signaled) >>> { >>> int i, r = 0; >>> bool need_full_reset = *need_full_reset_arg; >>> + struct amdgpu_ring *job_ring = job ? >>> to_amdgpu_ring(job->base.sched) : NULL; >>> /* block all schedulers and reset given job's ring */ >>> for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { >>> @@ -3341,6 +3343,17 @@ static int amdgpu_device_pre_asic_reset(struct >>> amdgpu_device *adev, >>> drm_sched_stop(&ring->sched, &job->base); >>> + /* >>> + * Must check guilty signal here since after this point all old >>> + * HW fences are force signaled. >>> + * >>> + * job->base holds a reference to parent fence >>> + */ >>> + if (job_signaled && job && ring == job_ring && >>> + job->base.s_fence->parent && >>> + dma_fence_is_signaled(job->base.s_fence->parent)) >>> + *job_signaled = true; >>> + >> That won't work correctly. See when the guilty job is not on the first >> scheduler, you would already have force completed some before getting >> here. >> >> Better to stop all schedulers first and then do the check. >> >> Christian. > > What do you mean by first scheduler ? There is one scheduler object per > ring so I am not clear what 'first' means here. Well for example if the guilty job is from a compute ring the we have already force signaled the gfx ring here. Same is true for other devices in the same hive, so it would probably be a good idea to move the force signaling and the IP reset somewhere else and this check up a layer. Christian. > > Andrey > > >>> /* after all hw jobs are reset, hw fence is meaningless, so >>> force_completion */ >>> amdgpu_fence_driver_force_completion(ring); >>> } >>> @@ -3358,7 +3371,8 @@ static int amdgpu_device_pre_asic_reset(struct >>> amdgpu_device *adev, >>> - if (!amdgpu_sriov_vf(adev)) { >>> + /* Don't suspend on bare metal if we are not going to HW reset >>> the ASIC */ >>> + if (!amdgpu_sriov_vf(adev) && !(*job_signaled)) { >>> if (!need_full_reset) >>> need_full_reset = amdgpu_device_ip_need_full_reset(adev); >>> @@ -3495,7 +3509,7 @@ static int amdgpu_do_asic_reset(struct >>> amdgpu_hive_info *hive, >>> return r; >>> } >>> -static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev) >>> +static void amdgpu_device_post_asic_reset(struct amdgpu_device >>> *adev, bool job_signaled) >>> { >>> int i; >>> @@ -3505,7 +3519,8 @@ static void >>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev) >>> if (!ring || !ring->sched.thread) >>> continue; >>> - if (!adev->asic_reset_res) >>> + /* No point to resubmit jobs if we didn't HW reset*/ >>> + if (!adev->asic_reset_res && !job_signaled) >>> drm_sched_resubmit_jobs(&ring->sched); >>> drm_sched_start(&ring->sched, !adev->asic_reset_res); >>> @@ -3518,14 +3533,21 @@ static void >>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev) >>> adev->asic_reset_res = 0; >>> } >>> -static void amdgpu_device_lock_adev(struct amdgpu_device *adev) >>> +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool >>> trylock) >>> { >>> - mutex_lock(&adev->lock_reset); >>> + if (trylock) { >>> + if (!mutex_trylock(&adev->lock_reset)) >>> + return false; >>> + } else >>> + mutex_lock(&adev->lock_reset); >>> + >>> atomic_inc(&adev->gpu_reset_counter); >>> adev->in_gpu_reset = 1; >>> /* Block kfd: SRIOV would do it separately */ >>> if (!amdgpu_sriov_vf(adev)) >>> amdgpu_amdkfd_pre_reset(adev); >>> + >>> + return true; >>> } >>> static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) >>> @@ -3555,29 +3577,44 @@ int amdgpu_device_gpu_recover(struct >>> amdgpu_device *adev, >>> { >>> int r; >>> struct amdgpu_hive_info *hive = NULL; >>> - bool need_full_reset = false; >>> struct amdgpu_device *tmp_adev = NULL; >>> struct list_head device_list, *device_list_handle = NULL; >>> + bool xgmi_topology_present, need_full_reset, job_signaled; >>> + need_full_reset = job_signaled = false; >>> INIT_LIST_HEAD(&device_list); >>> dev_info(adev->dev, "GPU reset begin!\n"); >>> + hive = amdgpu_get_xgmi_hive(adev, 0); >>> + xgmi_topology_present = hive && >>> adev->gmc.xgmi.num_physical_nodes > 1; >>> + >>> /* >>> - * In case of XGMI hive disallow concurrent resets to be triggered >>> - * by different nodes. No point also since the one node already >>> executing >>> - * reset will also reset all the other nodes in the hive. >>> + * Here we trylock to avoid chain of resets executing from >>> + * either trigger by jobs on different adevs in XGMI hive or >>> jobs on >>> + * different schedulers for same device while this tdr is running. >>> + * We always reset all schedulers for device and all devices for >>> XGMI >>> + * hive so that should take care of them too. >>> */ >>> - hive = amdgpu_get_xgmi_hive(adev, 0); >>> - if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && >>> - !mutex_trylock(&hive->reset_lock)) >>> + >>> + if (xgmi_topology_present && !mutex_trylock(&hive->reset_lock)) { >>> + DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as >>> another already in progress", >>> + job->base.id, hive->hive_id); >>> return 0; >>> + } >>> /* Start with adev pre asic reset first for soft reset check.*/ >>> - amdgpu_device_lock_adev(adev); >>> + if (!amdgpu_device_lock_adev(adev, !xgmi_topology_present)) { >>> + DRM_INFO("Bailing on TDR for s_job:%llx, as another already >>> in progress", >>> + job->base.id); >>> + return 0; >>> + } >>> + >>> + /* Guilty job will be freed after this*/ >>> r = amdgpu_device_pre_asic_reset(adev, >>> job, >>> - &need_full_reset); >>> + &need_full_reset, >>> + &job_signaled); >>> if (r) { >>> /*TODO Should we stop ?*/ >>> DRM_ERROR("GPU pre asic reset failed with err, %d for drm >>> dev, %s ", >>> @@ -3609,10 +3646,11 @@ int amdgpu_device_gpu_recover(struct >>> amdgpu_device *adev, >>> if (tmp_adev == adev) >>> continue; >>> - amdgpu_device_lock_adev(tmp_adev); >>> + amdgpu_device_lock_adev(tmp_adev, false); >>> r = amdgpu_device_pre_asic_reset(tmp_adev, >>> NULL, >>> - &need_full_reset); >>> + &need_full_reset, >>> + &job_signaled); >>> /*TODO Should we stop ?*/ >>> if (r) { >>> DRM_ERROR("GPU pre asic reset failed with err, %d for >>> drm dev, %s ", >>> @@ -3623,19 +3661,21 @@ int amdgpu_device_gpu_recover(struct >>> amdgpu_device *adev, >>> /* Actual ASIC resets if needed.*/ >>> /* TODO Implement XGMI hive reset logic for SRIOV */ >>> - if (amdgpu_sriov_vf(adev)) { >>> - r = amdgpu_device_reset_sriov(adev, job ? false : true); >>> - if (r) >>> - adev->asic_reset_res = r; >>> - } else { >>> - r = amdgpu_do_asic_reset(hive, device_list_handle, >>> &need_full_reset); >>> - if (r && r == -EAGAIN) >>> - goto retry; >>> + if (!job || !job_signaled) { >>> + if (amdgpu_sriov_vf(adev)) { >>> + r = amdgpu_device_reset_sriov(adev, job ? false : true); >>> + if (r) >>> + adev->asic_reset_res = r; >>> + } else { >>> + r = amdgpu_do_asic_reset(hive, device_list_handle, >>> &need_full_reset); >>> + if (r && r == -EAGAIN) >>> + goto retry; >>> + } >>> } >>> /* Post ASIC reset for all devs .*/ >>> list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { >>> - amdgpu_device_post_asic_reset(tmp_adev); >>> + amdgpu_device_post_asic_reset(tmp_adev, job_signaled); >>> if (r) { >>> /* bad news, how to tell it to userspace ? */ >>> @@ -3648,7 +3688,7 @@ int amdgpu_device_gpu_recover(struct >>> amdgpu_device *adev, >>> amdgpu_device_unlock_adev(tmp_adev); >>> } >>> - if (hive && adev->gmc.xgmi.num_physical_nodes > 1) >>> + if (xgmi_topology_present) >>> mutex_unlock(&hive->reset_lock); >>> if (r) > _______________________________________________ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
On 4/10/19 10:06 AM, Christian König wrote: > Am 09.04.19 um 18:42 schrieb Grodzovsky, Andrey: >> On 4/9/19 10:50 AM, Christian König wrote: >>> Am 08.04.19 um 18:08 schrieb Andrey Grodzovsky: >>>> Also reject TDRs if another one already running. >>>> >>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>>> --- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 94 >>>> +++++++++++++++++++++--------- >>>> 1 file changed, 67 insertions(+), 27 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>> index aabd043..4446892 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>> @@ -3327,10 +3327,12 @@ bool amdgpu_device_should_recover_gpu(struct >>>> amdgpu_device *adev) >>>> static int amdgpu_device_pre_asic_reset(struct amdgpu_device >>>> *adev, >>>> struct amdgpu_job *job, >>>> - bool *need_full_reset_arg) >>>> + bool *need_full_reset_arg, >>>> + bool *job_signaled) >>>> { >>>> int i, r = 0; >>>> bool need_full_reset = *need_full_reset_arg; >>>> + struct amdgpu_ring *job_ring = job ? >>>> to_amdgpu_ring(job->base.sched) : NULL; >>>> /* block all schedulers and reset given job's ring */ >>>> for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { >>>> @@ -3341,6 +3343,17 @@ static int amdgpu_device_pre_asic_reset(struct >>>> amdgpu_device *adev, >>>> drm_sched_stop(&ring->sched, &job->base); >>>> + /* >>>> + * Must check guilty signal here since after this point >>>> all old >>>> + * HW fences are force signaled. >>>> + * >>>> + * job->base holds a reference to parent fence >>>> + */ >>>> + if (job_signaled && job && ring == job_ring && >>>> + job->base.s_fence->parent && >>>> + dma_fence_is_signaled(job->base.s_fence->parent)) >>>> + *job_signaled = true; >>>> + >>> That won't work correctly. See when the guilty job is not on the first >>> scheduler, you would already have force completed some before getting >>> here. >>> >>> Better to stop all schedulers first and then do the check. >>> >>> Christian. >> >> What do you mean by first scheduler ? There is one scheduler object per >> ring so I am not clear what 'first' means here. > > Well for example if the guilty job is from a compute ring the we have > already force signaled the gfx ring here. > > Same is true for other devices in the same hive, so it would probably > be a good idea to move the force signaling and the IP reset somewhere > else and this check up a layer. > > Christian. Let me see if I understand you correctly - you want to AVOID ANY force signaling in case we are not going to HW reset and so you want to have the check if guilty is signaled BEFORE any ring fences are force signaled. Correct ? Andrey > >> >> Andrey >> >> >>>> /* after all hw jobs are reset, hw fence is meaningless, so >>>> force_completion */ >>>> amdgpu_fence_driver_force_completion(ring); >>>> } >>>> @@ -3358,7 +3371,8 @@ static int amdgpu_device_pre_asic_reset(struct >>>> amdgpu_device *adev, >>>> - if (!amdgpu_sriov_vf(adev)) { >>>> + /* Don't suspend on bare metal if we are not going to HW reset >>>> the ASIC */ >>>> + if (!amdgpu_sriov_vf(adev) && !(*job_signaled)) { >>>> if (!need_full_reset) >>>> need_full_reset = >>>> amdgpu_device_ip_need_full_reset(adev); >>>> @@ -3495,7 +3509,7 @@ static int amdgpu_do_asic_reset(struct >>>> amdgpu_hive_info *hive, >>>> return r; >>>> } >>>> -static void amdgpu_device_post_asic_reset(struct amdgpu_device >>>> *adev) >>>> +static void amdgpu_device_post_asic_reset(struct amdgpu_device >>>> *adev, bool job_signaled) >>>> { >>>> int i; >>>> @@ -3505,7 +3519,8 @@ static void >>>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev) >>>> if (!ring || !ring->sched.thread) >>>> continue; >>>> - if (!adev->asic_reset_res) >>>> + /* No point to resubmit jobs if we didn't HW reset*/ >>>> + if (!adev->asic_reset_res && !job_signaled) >>>> drm_sched_resubmit_jobs(&ring->sched); >>>> drm_sched_start(&ring->sched, !adev->asic_reset_res); >>>> @@ -3518,14 +3533,21 @@ static void >>>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev) >>>> adev->asic_reset_res = 0; >>>> } >>>> -static void amdgpu_device_lock_adev(struct amdgpu_device *adev) >>>> +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool >>>> trylock) >>>> { >>>> - mutex_lock(&adev->lock_reset); >>>> + if (trylock) { >>>> + if (!mutex_trylock(&adev->lock_reset)) >>>> + return false; >>>> + } else >>>> + mutex_lock(&adev->lock_reset); >>>> + >>>> atomic_inc(&adev->gpu_reset_counter); >>>> adev->in_gpu_reset = 1; >>>> /* Block kfd: SRIOV would do it separately */ >>>> if (!amdgpu_sriov_vf(adev)) >>>> amdgpu_amdkfd_pre_reset(adev); >>>> + >>>> + return true; >>>> } >>>> static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) >>>> @@ -3555,29 +3577,44 @@ int amdgpu_device_gpu_recover(struct >>>> amdgpu_device *adev, >>>> { >>>> int r; >>>> struct amdgpu_hive_info *hive = NULL; >>>> - bool need_full_reset = false; >>>> struct amdgpu_device *tmp_adev = NULL; >>>> struct list_head device_list, *device_list_handle = NULL; >>>> + bool xgmi_topology_present, need_full_reset, job_signaled; >>>> + need_full_reset = job_signaled = false; >>>> INIT_LIST_HEAD(&device_list); >>>> dev_info(adev->dev, "GPU reset begin!\n"); >>>> + hive = amdgpu_get_xgmi_hive(adev, 0); >>>> + xgmi_topology_present = hive && >>>> adev->gmc.xgmi.num_physical_nodes > 1; >>>> + >>>> /* >>>> - * In case of XGMI hive disallow concurrent resets to be >>>> triggered >>>> - * by different nodes. No point also since the one node already >>>> executing >>>> - * reset will also reset all the other nodes in the hive. >>>> + * Here we trylock to avoid chain of resets executing from >>>> + * either trigger by jobs on different adevs in XGMI hive or >>>> jobs on >>>> + * different schedulers for same device while this tdr is >>>> running. >>>> + * We always reset all schedulers for device and all devices for >>>> XGMI >>>> + * hive so that should take care of them too. >>>> */ >>>> - hive = amdgpu_get_xgmi_hive(adev, 0); >>>> - if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && >>>> - !mutex_trylock(&hive->reset_lock)) >>>> + >>>> + if (xgmi_topology_present && !mutex_trylock(&hive->reset_lock)) { >>>> + DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as >>>> another already in progress", >>>> + job->base.id, hive->hive_id); >>>> return 0; >>>> + } >>>> /* Start with adev pre asic reset first for soft reset >>>> check.*/ >>>> - amdgpu_device_lock_adev(adev); >>>> + if (!amdgpu_device_lock_adev(adev, !xgmi_topology_present)) { >>>> + DRM_INFO("Bailing on TDR for s_job:%llx, as another already >>>> in progress", >>>> + job->base.id); >>>> + return 0; >>>> + } >>>> + >>>> + /* Guilty job will be freed after this*/ >>>> r = amdgpu_device_pre_asic_reset(adev, >>>> job, >>>> - &need_full_reset); >>>> + &need_full_reset, >>>> + &job_signaled); >>>> if (r) { >>>> /*TODO Should we stop ?*/ >>>> DRM_ERROR("GPU pre asic reset failed with err, %d for drm >>>> dev, %s ", >>>> @@ -3609,10 +3646,11 @@ int amdgpu_device_gpu_recover(struct >>>> amdgpu_device *adev, >>>> if (tmp_adev == adev) >>>> continue; >>>> - amdgpu_device_lock_adev(tmp_adev); >>>> + amdgpu_device_lock_adev(tmp_adev, false); >>>> r = amdgpu_device_pre_asic_reset(tmp_adev, >>>> NULL, >>>> - &need_full_reset); >>>> + &need_full_reset, >>>> + &job_signaled); >>>> /*TODO Should we stop ?*/ >>>> if (r) { >>>> DRM_ERROR("GPU pre asic reset failed with err, %d for >>>> drm dev, %s ", >>>> @@ -3623,19 +3661,21 @@ int amdgpu_device_gpu_recover(struct >>>> amdgpu_device *adev, >>>> /* Actual ASIC resets if needed.*/ >>>> /* TODO Implement XGMI hive reset logic for SRIOV */ >>>> - if (amdgpu_sriov_vf(adev)) { >>>> - r = amdgpu_device_reset_sriov(adev, job ? false : true); >>>> - if (r) >>>> - adev->asic_reset_res = r; >>>> - } else { >>>> - r = amdgpu_do_asic_reset(hive, device_list_handle, >>>> &need_full_reset); >>>> - if (r && r == -EAGAIN) >>>> - goto retry; >>>> + if (!job || !job_signaled) { >>>> + if (amdgpu_sriov_vf(adev)) { >>>> + r = amdgpu_device_reset_sriov(adev, job ? false : true); >>>> + if (r) >>>> + adev->asic_reset_res = r; >>>> + } else { >>>> + r = amdgpu_do_asic_reset(hive, device_list_handle, >>>> &need_full_reset); >>>> + if (r && r == -EAGAIN) >>>> + goto retry; >>>> + } >>>> } >>>> /* Post ASIC reset for all devs .*/ >>>> list_for_each_entry(tmp_adev, device_list_handle, >>>> gmc.xgmi.head) { >>>> - amdgpu_device_post_asic_reset(tmp_adev); >>>> + amdgpu_device_post_asic_reset(tmp_adev, job_signaled); >>>> if (r) { >>>> /* bad news, how to tell it to userspace ? */ >>>> @@ -3648,7 +3688,7 @@ int amdgpu_device_gpu_recover(struct >>>> amdgpu_device *adev, >>>> amdgpu_device_unlock_adev(tmp_adev); >>>> } >>>> - if (hive && adev->gmc.xgmi.num_physical_nodes > 1) >>>> + if (xgmi_topology_present) >>>> mutex_unlock(&hive->reset_lock); >>>> if (r) >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >
Am 10.04.19 um 16:28 schrieb Grodzovsky, Andrey: > On 4/10/19 10:06 AM, Christian König wrote: >> Am 09.04.19 um 18:42 schrieb Grodzovsky, Andrey: >>> On 4/9/19 10:50 AM, Christian König wrote: >>>> Am 08.04.19 um 18:08 schrieb Andrey Grodzovsky: >>>>> Also reject TDRs if another one already running. >>>>> >>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>>>> --- >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 94 >>>>> +++++++++++++++++++++--------- >>>>> 1 file changed, 67 insertions(+), 27 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>> index aabd043..4446892 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>> @@ -3327,10 +3327,12 @@ bool amdgpu_device_should_recover_gpu(struct >>>>> amdgpu_device *adev) >>>>> static int amdgpu_device_pre_asic_reset(struct amdgpu_device >>>>> *adev, >>>>> struct amdgpu_job *job, >>>>> - bool *need_full_reset_arg) >>>>> + bool *need_full_reset_arg, >>>>> + bool *job_signaled) >>>>> { >>>>> int i, r = 0; >>>>> bool need_full_reset = *need_full_reset_arg; >>>>> + struct amdgpu_ring *job_ring = job ? >>>>> to_amdgpu_ring(job->base.sched) : NULL; >>>>> /* block all schedulers and reset given job's ring */ >>>>> for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { >>>>> @@ -3341,6 +3343,17 @@ static int amdgpu_device_pre_asic_reset(struct >>>>> amdgpu_device *adev, >>>>> drm_sched_stop(&ring->sched, &job->base); >>>>> + /* >>>>> + * Must check guilty signal here since after this point >>>>> all old >>>>> + * HW fences are force signaled. >>>>> + * >>>>> + * job->base holds a reference to parent fence >>>>> + */ >>>>> + if (job_signaled && job && ring == job_ring && >>>>> + job->base.s_fence->parent && >>>>> + dma_fence_is_signaled(job->base.s_fence->parent)) >>>>> + *job_signaled = true; >>>>> + >>>> That won't work correctly. See when the guilty job is not on the first >>>> scheduler, you would already have force completed some before getting >>>> here. >>>> >>>> Better to stop all schedulers first and then do the check. >>>> >>>> Christian. >>> What do you mean by first scheduler ? There is one scheduler object per >>> ring so I am not clear what 'first' means here. >> Well for example if the guilty job is from a compute ring the we have >> already force signaled the gfx ring here. >> >> Same is true for other devices in the same hive, so it would probably >> be a good idea to move the force signaling and the IP reset somewhere >> else and this check up a layer. >> >> Christian. > > Let me see if I understand you correctly - you want to AVOID ANY force > signaling in case we are not going to HW reset and so you want to have > the check if guilty is signaled BEFORE any ring fences are force > signaled. Correct ? Correct. Basically we should do the following: 1. Stop all schedulers to make sure that nothing is going on. 2. Check the guilty job once more to make sure that it hasn't signaled in the meantime. 3. Start our reset procedure, with force complete, soft reset eventually hard reset etc etc.. 4. Resubmit all not yet completed jobs. 5. Start the schedulers again. Christian. > > Andrey > >>> Andrey >>> >>> >>>>> /* after all hw jobs are reset, hw fence is meaningless, so >>>>> force_completion */ >>>>> amdgpu_fence_driver_force_completion(ring); >>>>> } >>>>> @@ -3358,7 +3371,8 @@ static int amdgpu_device_pre_asic_reset(struct >>>>> amdgpu_device *adev, >>>>> - if (!amdgpu_sriov_vf(adev)) { >>>>> + /* Don't suspend on bare metal if we are not going to HW reset >>>>> the ASIC */ >>>>> + if (!amdgpu_sriov_vf(adev) && !(*job_signaled)) { >>>>> if (!need_full_reset) >>>>> need_full_reset = >>>>> amdgpu_device_ip_need_full_reset(adev); >>>>> @@ -3495,7 +3509,7 @@ static int amdgpu_do_asic_reset(struct >>>>> amdgpu_hive_info *hive, >>>>> return r; >>>>> } >>>>> -static void amdgpu_device_post_asic_reset(struct amdgpu_device >>>>> *adev) >>>>> +static void amdgpu_device_post_asic_reset(struct amdgpu_device >>>>> *adev, bool job_signaled) >>>>> { >>>>> int i; >>>>> @@ -3505,7 +3519,8 @@ static void >>>>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev) >>>>> if (!ring || !ring->sched.thread) >>>>> continue; >>>>> - if (!adev->asic_reset_res) >>>>> + /* No point to resubmit jobs if we didn't HW reset*/ >>>>> + if (!adev->asic_reset_res && !job_signaled) >>>>> drm_sched_resubmit_jobs(&ring->sched); >>>>> drm_sched_start(&ring->sched, !adev->asic_reset_res); >>>>> @@ -3518,14 +3533,21 @@ static void >>>>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev) >>>>> adev->asic_reset_res = 0; >>>>> } >>>>> -static void amdgpu_device_lock_adev(struct amdgpu_device *adev) >>>>> +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool >>>>> trylock) >>>>> { >>>>> - mutex_lock(&adev->lock_reset); >>>>> + if (trylock) { >>>>> + if (!mutex_trylock(&adev->lock_reset)) >>>>> + return false; >>>>> + } else >>>>> + mutex_lock(&adev->lock_reset); >>>>> + >>>>> atomic_inc(&adev->gpu_reset_counter); >>>>> adev->in_gpu_reset = 1; >>>>> /* Block kfd: SRIOV would do it separately */ >>>>> if (!amdgpu_sriov_vf(adev)) >>>>> amdgpu_amdkfd_pre_reset(adev); >>>>> + >>>>> + return true; >>>>> } >>>>> static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) >>>>> @@ -3555,29 +3577,44 @@ int amdgpu_device_gpu_recover(struct >>>>> amdgpu_device *adev, >>>>> { >>>>> int r; >>>>> struct amdgpu_hive_info *hive = NULL; >>>>> - bool need_full_reset = false; >>>>> struct amdgpu_device *tmp_adev = NULL; >>>>> struct list_head device_list, *device_list_handle = NULL; >>>>> + bool xgmi_topology_present, need_full_reset, job_signaled; >>>>> + need_full_reset = job_signaled = false; >>>>> INIT_LIST_HEAD(&device_list); >>>>> dev_info(adev->dev, "GPU reset begin!\n"); >>>>> + hive = amdgpu_get_xgmi_hive(adev, 0); >>>>> + xgmi_topology_present = hive && >>>>> adev->gmc.xgmi.num_physical_nodes > 1; >>>>> + >>>>> /* >>>>> - * In case of XGMI hive disallow concurrent resets to be >>>>> triggered >>>>> - * by different nodes. No point also since the one node already >>>>> executing >>>>> - * reset will also reset all the other nodes in the hive. >>>>> + * Here we trylock to avoid chain of resets executing from >>>>> + * either trigger by jobs on different adevs in XGMI hive or >>>>> jobs on >>>>> + * different schedulers for same device while this tdr is >>>>> running. >>>>> + * We always reset all schedulers for device and all devices for >>>>> XGMI >>>>> + * hive so that should take care of them too. >>>>> */ >>>>> - hive = amdgpu_get_xgmi_hive(adev, 0); >>>>> - if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && >>>>> - !mutex_trylock(&hive->reset_lock)) >>>>> + >>>>> + if (xgmi_topology_present && !mutex_trylock(&hive->reset_lock)) { >>>>> + DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as >>>>> another already in progress", >>>>> + job->base.id, hive->hive_id); >>>>> return 0; >>>>> + } >>>>> /* Start with adev pre asic reset first for soft reset >>>>> check.*/ >>>>> - amdgpu_device_lock_adev(adev); >>>>> + if (!amdgpu_device_lock_adev(adev, !xgmi_topology_present)) { >>>>> + DRM_INFO("Bailing on TDR for s_job:%llx, as another already >>>>> in progress", >>>>> + job->base.id); >>>>> + return 0; >>>>> + } >>>>> + >>>>> + /* Guilty job will be freed after this*/ >>>>> r = amdgpu_device_pre_asic_reset(adev, >>>>> job, >>>>> - &need_full_reset); >>>>> + &need_full_reset, >>>>> + &job_signaled); >>>>> if (r) { >>>>> /*TODO Should we stop ?*/ >>>>> DRM_ERROR("GPU pre asic reset failed with err, %d for drm >>>>> dev, %s ", >>>>> @@ -3609,10 +3646,11 @@ int amdgpu_device_gpu_recover(struct >>>>> amdgpu_device *adev, >>>>> if (tmp_adev == adev) >>>>> continue; >>>>> - amdgpu_device_lock_adev(tmp_adev); >>>>> + amdgpu_device_lock_adev(tmp_adev, false); >>>>> r = amdgpu_device_pre_asic_reset(tmp_adev, >>>>> NULL, >>>>> - &need_full_reset); >>>>> + &need_full_reset, >>>>> + &job_signaled); >>>>> /*TODO Should we stop ?*/ >>>>> if (r) { >>>>> DRM_ERROR("GPU pre asic reset failed with err, %d for >>>>> drm dev, %s ", >>>>> @@ -3623,19 +3661,21 @@ int amdgpu_device_gpu_recover(struct >>>>> amdgpu_device *adev, >>>>> /* Actual ASIC resets if needed.*/ >>>>> /* TODO Implement XGMI hive reset logic for SRIOV */ >>>>> - if (amdgpu_sriov_vf(adev)) { >>>>> - r = amdgpu_device_reset_sriov(adev, job ? false : true); >>>>> - if (r) >>>>> - adev->asic_reset_res = r; >>>>> - } else { >>>>> - r = amdgpu_do_asic_reset(hive, device_list_handle, >>>>> &need_full_reset); >>>>> - if (r && r == -EAGAIN) >>>>> - goto retry; >>>>> + if (!job || !job_signaled) { >>>>> + if (amdgpu_sriov_vf(adev)) { >>>>> + r = amdgpu_device_reset_sriov(adev, job ? false : true); >>>>> + if (r) >>>>> + adev->asic_reset_res = r; >>>>> + } else { >>>>> + r = amdgpu_do_asic_reset(hive, device_list_handle, >>>>> &need_full_reset); >>>>> + if (r && r == -EAGAIN) >>>>> + goto retry; >>>>> + } >>>>> } >>>>> /* Post ASIC reset for all devs .*/ >>>>> list_for_each_entry(tmp_adev, device_list_handle, >>>>> gmc.xgmi.head) { >>>>> - amdgpu_device_post_asic_reset(tmp_adev); >>>>> + amdgpu_device_post_asic_reset(tmp_adev, job_signaled); >>>>> if (r) { >>>>> /* bad news, how to tell it to userspace ? */ >>>>> @@ -3648,7 +3688,7 @@ int amdgpu_device_gpu_recover(struct >>>>> amdgpu_device *adev, >>>>> amdgpu_device_unlock_adev(tmp_adev); >>>>> } >>>>> - if (hive && adev->gmc.xgmi.num_physical_nodes > 1) >>>>> + if (xgmi_topology_present) >>>>> mutex_unlock(&hive->reset_lock); >>>>> if (r) >>> _______________________________________________ >>> amd-gfx mailing list >>> amd-gfx@lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx > _______________________________________________ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
On 4/10/19 10:41 AM, Christian König wrote: > Am 10.04.19 um 16:28 schrieb Grodzovsky, Andrey: >> On 4/10/19 10:06 AM, Christian König wrote: >>> Am 09.04.19 um 18:42 schrieb Grodzovsky, Andrey: >>>> On 4/9/19 10:50 AM, Christian König wrote: >>>>> Am 08.04.19 um 18:08 schrieb Andrey Grodzovsky: >>>>>> Also reject TDRs if another one already running. >>>>>> >>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>>>>> --- >>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 94 >>>>>> +++++++++++++++++++++--------- >>>>>> 1 file changed, 67 insertions(+), 27 deletions(-) >>>>>> >>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>>> index aabd043..4446892 100644 >>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>>> @@ -3327,10 +3327,12 @@ bool amdgpu_device_should_recover_gpu(struct >>>>>> amdgpu_device *adev) >>>>>> static int amdgpu_device_pre_asic_reset(struct amdgpu_device >>>>>> *adev, >>>>>> struct amdgpu_job *job, >>>>>> - bool *need_full_reset_arg) >>>>>> + bool *need_full_reset_arg, >>>>>> + bool *job_signaled) >>>>>> { >>>>>> int i, r = 0; >>>>>> bool need_full_reset = *need_full_reset_arg; >>>>>> + struct amdgpu_ring *job_ring = job ? >>>>>> to_amdgpu_ring(job->base.sched) : NULL; >>>>>> /* block all schedulers and reset given job's ring */ >>>>>> for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { >>>>>> @@ -3341,6 +3343,17 @@ static int >>>>>> amdgpu_device_pre_asic_reset(struct >>>>>> amdgpu_device *adev, >>>>>> drm_sched_stop(&ring->sched, &job->base); >>>>>> + /* >>>>>> + * Must check guilty signal here since after this point >>>>>> all old >>>>>> + * HW fences are force signaled. >>>>>> + * >>>>>> + * job->base holds a reference to parent fence >>>>>> + */ >>>>>> + if (job_signaled && job && ring == job_ring && >>>>>> + job->base.s_fence->parent && >>>>>> + dma_fence_is_signaled(job->base.s_fence->parent)) >>>>>> + *job_signaled = true; >>>>>> + >>>>> That won't work correctly. See when the guilty job is not on the >>>>> first >>>>> scheduler, you would already have force completed some before getting >>>>> here. >>>>> >>>>> Better to stop all schedulers first and then do the check. >>>>> >>>>> Christian. >>>> What do you mean by first scheduler ? There is one scheduler object >>>> per >>>> ring so I am not clear what 'first' means here. >>> Well for example if the guilty job is from a compute ring the we have >>> already force signaled the gfx ring here. >>> >>> Same is true for other devices in the same hive, so it would probably >>> be a good idea to move the force signaling and the IP reset somewhere >>> else and this check up a layer. >>> >>> Christian. >> >> Let me see if I understand you correctly - you want to AVOID ANY force >> signaling in case we are not going to HW reset and so you want to have >> the check if guilty is signaled BEFORE any ring fences are force >> signaled. Correct ? > > Correct. > > Basically we should do the following: > 1. Stop all schedulers to make sure that nothing is going on. > 2. Check the guilty job once more to make sure that it hasn't signaled > in the meantime. > 3. Start our reset procedure, with force complete, soft reset > eventually hard reset etc etc.. > 4. Resubmit all not yet completed jobs. > 5. Start the schedulers again. > > Christian. Why not just always ensure the guilty job's ring is always checked first and then do the rest of the rings - inside amdgpu_device_pre_asic_reset. Seems to me like a much smaller change with less impact to current code structure. Andrey > >> >> Andrey >> >>>> Andrey >>>> >>>> >>>>>> /* after all hw jobs are reset, hw fence is >>>>>> meaningless, so >>>>>> force_completion */ >>>>>> amdgpu_fence_driver_force_completion(ring); >>>>>> } >>>>>> @@ -3358,7 +3371,8 @@ static int amdgpu_device_pre_asic_reset(struct >>>>>> amdgpu_device *adev, >>>>>> - if (!amdgpu_sriov_vf(adev)) { >>>>>> + /* Don't suspend on bare metal if we are not going to HW reset >>>>>> the ASIC */ >>>>>> + if (!amdgpu_sriov_vf(adev) && !(*job_signaled)) { >>>>>> if (!need_full_reset) >>>>>> need_full_reset = >>>>>> amdgpu_device_ip_need_full_reset(adev); >>>>>> @@ -3495,7 +3509,7 @@ static int amdgpu_do_asic_reset(struct >>>>>> amdgpu_hive_info *hive, >>>>>> return r; >>>>>> } >>>>>> -static void amdgpu_device_post_asic_reset(struct amdgpu_device >>>>>> *adev) >>>>>> +static void amdgpu_device_post_asic_reset(struct amdgpu_device >>>>>> *adev, bool job_signaled) >>>>>> { >>>>>> int i; >>>>>> @@ -3505,7 +3519,8 @@ static void >>>>>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev) >>>>>> if (!ring || !ring->sched.thread) >>>>>> continue; >>>>>> - if (!adev->asic_reset_res) >>>>>> + /* No point to resubmit jobs if we didn't HW reset*/ >>>>>> + if (!adev->asic_reset_res && !job_signaled) >>>>>> drm_sched_resubmit_jobs(&ring->sched); >>>>>> drm_sched_start(&ring->sched, !adev->asic_reset_res); >>>>>> @@ -3518,14 +3533,21 @@ static void >>>>>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev) >>>>>> adev->asic_reset_res = 0; >>>>>> } >>>>>> -static void amdgpu_device_lock_adev(struct amdgpu_device *adev) >>>>>> +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, >>>>>> bool >>>>>> trylock) >>>>>> { >>>>>> - mutex_lock(&adev->lock_reset); >>>>>> + if (trylock) { >>>>>> + if (!mutex_trylock(&adev->lock_reset)) >>>>>> + return false; >>>>>> + } else >>>>>> + mutex_lock(&adev->lock_reset); >>>>>> + >>>>>> atomic_inc(&adev->gpu_reset_counter); >>>>>> adev->in_gpu_reset = 1; >>>>>> /* Block kfd: SRIOV would do it separately */ >>>>>> if (!amdgpu_sriov_vf(adev)) >>>>>> amdgpu_amdkfd_pre_reset(adev); >>>>>> + >>>>>> + return true; >>>>>> } >>>>>> static void amdgpu_device_unlock_adev(struct amdgpu_device >>>>>> *adev) >>>>>> @@ -3555,29 +3577,44 @@ int amdgpu_device_gpu_recover(struct >>>>>> amdgpu_device *adev, >>>>>> { >>>>>> int r; >>>>>> struct amdgpu_hive_info *hive = NULL; >>>>>> - bool need_full_reset = false; >>>>>> struct amdgpu_device *tmp_adev = NULL; >>>>>> struct list_head device_list, *device_list_handle = NULL; >>>>>> + bool xgmi_topology_present, need_full_reset, job_signaled; >>>>>> + need_full_reset = job_signaled = false; >>>>>> INIT_LIST_HEAD(&device_list); >>>>>> dev_info(adev->dev, "GPU reset begin!\n"); >>>>>> + hive = amdgpu_get_xgmi_hive(adev, 0); >>>>>> + xgmi_topology_present = hive && >>>>>> adev->gmc.xgmi.num_physical_nodes > 1; >>>>>> + >>>>>> /* >>>>>> - * In case of XGMI hive disallow concurrent resets to be >>>>>> triggered >>>>>> - * by different nodes. No point also since the one node already >>>>>> executing >>>>>> - * reset will also reset all the other nodes in the hive. >>>>>> + * Here we trylock to avoid chain of resets executing from >>>>>> + * either trigger by jobs on different adevs in XGMI hive or >>>>>> jobs on >>>>>> + * different schedulers for same device while this tdr is >>>>>> running. >>>>>> + * We always reset all schedulers for device and all devices >>>>>> for >>>>>> XGMI >>>>>> + * hive so that should take care of them too. >>>>>> */ >>>>>> - hive = amdgpu_get_xgmi_hive(adev, 0); >>>>>> - if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && >>>>>> - !mutex_trylock(&hive->reset_lock)) >>>>>> + >>>>>> + if (xgmi_topology_present && >>>>>> !mutex_trylock(&hive->reset_lock)) { >>>>>> + DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as >>>>>> another already in progress", >>>>>> + job->base.id, hive->hive_id); >>>>>> return 0; >>>>>> + } >>>>>> /* Start with adev pre asic reset first for soft reset >>>>>> check.*/ >>>>>> - amdgpu_device_lock_adev(adev); >>>>>> + if (!amdgpu_device_lock_adev(adev, !xgmi_topology_present)) { >>>>>> + DRM_INFO("Bailing on TDR for s_job:%llx, as another already >>>>>> in progress", >>>>>> + job->base.id); >>>>>> + return 0; >>>>>> + } >>>>>> + >>>>>> + /* Guilty job will be freed after this*/ >>>>>> r = amdgpu_device_pre_asic_reset(adev, >>>>>> job, >>>>>> - &need_full_reset); >>>>>> + &need_full_reset, >>>>>> + &job_signaled); >>>>>> if (r) { >>>>>> /*TODO Should we stop ?*/ >>>>>> DRM_ERROR("GPU pre asic reset failed with err, %d for >>>>>> drm >>>>>> dev, %s ", >>>>>> @@ -3609,10 +3646,11 @@ int amdgpu_device_gpu_recover(struct >>>>>> amdgpu_device *adev, >>>>>> if (tmp_adev == adev) >>>>>> continue; >>>>>> - amdgpu_device_lock_adev(tmp_adev); >>>>>> + amdgpu_device_lock_adev(tmp_adev, false); >>>>>> r = amdgpu_device_pre_asic_reset(tmp_adev, >>>>>> NULL, >>>>>> - &need_full_reset); >>>>>> + &need_full_reset, >>>>>> + &job_signaled); >>>>>> /*TODO Should we stop ?*/ >>>>>> if (r) { >>>>>> DRM_ERROR("GPU pre asic reset failed with err, %d >>>>>> for >>>>>> drm dev, %s ", >>>>>> @@ -3623,19 +3661,21 @@ int amdgpu_device_gpu_recover(struct >>>>>> amdgpu_device *adev, >>>>>> /* Actual ASIC resets if needed.*/ >>>>>> /* TODO Implement XGMI hive reset logic for SRIOV */ >>>>>> - if (amdgpu_sriov_vf(adev)) { >>>>>> - r = amdgpu_device_reset_sriov(adev, job ? false : true); >>>>>> - if (r) >>>>>> - adev->asic_reset_res = r; >>>>>> - } else { >>>>>> - r = amdgpu_do_asic_reset(hive, device_list_handle, >>>>>> &need_full_reset); >>>>>> - if (r && r == -EAGAIN) >>>>>> - goto retry; >>>>>> + if (!job || !job_signaled) { >>>>>> + if (amdgpu_sriov_vf(adev)) { >>>>>> + r = amdgpu_device_reset_sriov(adev, job ? false : >>>>>> true); >>>>>> + if (r) >>>>>> + adev->asic_reset_res = r; >>>>>> + } else { >>>>>> + r = amdgpu_do_asic_reset(hive, device_list_handle, >>>>>> &need_full_reset); >>>>>> + if (r && r == -EAGAIN) >>>>>> + goto retry; >>>>>> + } >>>>>> } >>>>>> /* Post ASIC reset for all devs .*/ >>>>>> list_for_each_entry(tmp_adev, device_list_handle, >>>>>> gmc.xgmi.head) { >>>>>> - amdgpu_device_post_asic_reset(tmp_adev); >>>>>> + amdgpu_device_post_asic_reset(tmp_adev, job_signaled); >>>>>> if (r) { >>>>>> /* bad news, how to tell it to userspace ? */ >>>>>> @@ -3648,7 +3688,7 @@ int amdgpu_device_gpu_recover(struct >>>>>> amdgpu_device *adev, >>>>>> amdgpu_device_unlock_adev(tmp_adev); >>>>>> } >>>>>> - if (hive && adev->gmc.xgmi.num_physical_nodes > 1) >>>>>> + if (xgmi_topology_present) >>>>>> mutex_unlock(&hive->reset_lock); >>>>>> if (r) >>>> _______________________________________________ >>>> amd-gfx mailing list >>>> amd-gfx@lists.freedesktop.org >>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >
Am 10.04.19 um 17:05 schrieb Grodzovsky, Andrey: > On 4/10/19 10:41 AM, Christian König wrote: >> Am 10.04.19 um 16:28 schrieb Grodzovsky, Andrey: >>> On 4/10/19 10:06 AM, Christian König wrote: >>>> Am 09.04.19 um 18:42 schrieb Grodzovsky, Andrey: >>>>> On 4/9/19 10:50 AM, Christian König wrote: >>>>>> Am 08.04.19 um 18:08 schrieb Andrey Grodzovsky: >>>>>>> Also reject TDRs if another one already running. >>>>>>> >>>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>>>>>> --- >>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 94 >>>>>>> +++++++++++++++++++++--------- >>>>>>> 1 file changed, 67 insertions(+), 27 deletions(-) >>>>>>> >>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>>>> index aabd043..4446892 100644 >>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>>>>>> @@ -3327,10 +3327,12 @@ bool amdgpu_device_should_recover_gpu(struct >>>>>>> amdgpu_device *adev) >>>>>>> static int amdgpu_device_pre_asic_reset(struct amdgpu_device >>>>>>> *adev, >>>>>>> struct amdgpu_job *job, >>>>>>> - bool *need_full_reset_arg) >>>>>>> + bool *need_full_reset_arg, >>>>>>> + bool *job_signaled) >>>>>>> { >>>>>>> int i, r = 0; >>>>>>> bool need_full_reset = *need_full_reset_arg; >>>>>>> + struct amdgpu_ring *job_ring = job ? >>>>>>> to_amdgpu_ring(job->base.sched) : NULL; >>>>>>> /* block all schedulers and reset given job's ring */ >>>>>>> for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { >>>>>>> @@ -3341,6 +3343,17 @@ static int >>>>>>> amdgpu_device_pre_asic_reset(struct >>>>>>> amdgpu_device *adev, >>>>>>> drm_sched_stop(&ring->sched, &job->base); >>>>>>> + /* >>>>>>> + * Must check guilty signal here since after this point >>>>>>> all old >>>>>>> + * HW fences are force signaled. >>>>>>> + * >>>>>>> + * job->base holds a reference to parent fence >>>>>>> + */ >>>>>>> + if (job_signaled && job && ring == job_ring && >>>>>>> + job->base.s_fence->parent && >>>>>>> + dma_fence_is_signaled(job->base.s_fence->parent)) >>>>>>> + *job_signaled = true; >>>>>>> + >>>>>> That won't work correctly. See when the guilty job is not on the >>>>>> first >>>>>> scheduler, you would already have force completed some before getting >>>>>> here. >>>>>> >>>>>> Better to stop all schedulers first and then do the check. >>>>>> >>>>>> Christian. >>>>> What do you mean by first scheduler ? There is one scheduler object >>>>> per >>>>> ring so I am not clear what 'first' means here. >>>> Well for example if the guilty job is from a compute ring the we have >>>> already force signaled the gfx ring here. >>>> >>>> Same is true for other devices in the same hive, so it would probably >>>> be a good idea to move the force signaling and the IP reset somewhere >>>> else and this check up a layer. >>>> >>>> Christian. >>> Let me see if I understand you correctly - you want to AVOID ANY force >>> signaling in case we are not going to HW reset and so you want to have >>> the check if guilty is signaled BEFORE any ring fences are force >>> signaled. Correct ? >> Correct. >> >> Basically we should do the following: >> 1. Stop all schedulers to make sure that nothing is going on. >> 2. Check the guilty job once more to make sure that it hasn't signaled >> in the meantime. >> 3. Start our reset procedure, with force complete, soft reset >> eventually hard reset etc etc.. >> 4. Resubmit all not yet completed jobs. >> 5. Start the schedulers again. >> >> Christian. > > Why not just always ensure the guilty job's ring is always checked > first and then do the rest of the rings - inside > amdgpu_device_pre_asic_reset. Seems to me like a much smaller change > with less impact to current code structure. Well we want to make sure that nothing else is touching the hardware any more before we start with the reset procedure. To be more precise force completing one ring could make some other scheduler think that it could go ahead and scheduler another job (because for example some hw resource (VMID?) now became available). Not sure if that is currently a problem, but I still think the GPU reset code is still rather badly designed and we should rather make larger code changes than smaller. Christian. > > Andrey > > >>> Andrey >>> >>>>> Andrey >>>>> >>>>> >>>>>>> /* after all hw jobs are reset, hw fence is >>>>>>> meaningless, so >>>>>>> force_completion */ >>>>>>> amdgpu_fence_driver_force_completion(ring); >>>>>>> } >>>>>>> @@ -3358,7 +3371,8 @@ static int amdgpu_device_pre_asic_reset(struct >>>>>>> amdgpu_device *adev, >>>>>>> - if (!amdgpu_sriov_vf(adev)) { >>>>>>> + /* Don't suspend on bare metal if we are not going to HW reset >>>>>>> the ASIC */ >>>>>>> + if (!amdgpu_sriov_vf(adev) && !(*job_signaled)) { >>>>>>> if (!need_full_reset) >>>>>>> need_full_reset = >>>>>>> amdgpu_device_ip_need_full_reset(adev); >>>>>>> @@ -3495,7 +3509,7 @@ static int amdgpu_do_asic_reset(struct >>>>>>> amdgpu_hive_info *hive, >>>>>>> return r; >>>>>>> } >>>>>>> -static void amdgpu_device_post_asic_reset(struct amdgpu_device >>>>>>> *adev) >>>>>>> +static void amdgpu_device_post_asic_reset(struct amdgpu_device >>>>>>> *adev, bool job_signaled) >>>>>>> { >>>>>>> int i; >>>>>>> @@ -3505,7 +3519,8 @@ static void >>>>>>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev) >>>>>>> if (!ring || !ring->sched.thread) >>>>>>> continue; >>>>>>> - if (!adev->asic_reset_res) >>>>>>> + /* No point to resubmit jobs if we didn't HW reset*/ >>>>>>> + if (!adev->asic_reset_res && !job_signaled) >>>>>>> drm_sched_resubmit_jobs(&ring->sched); >>>>>>> drm_sched_start(&ring->sched, !adev->asic_reset_res); >>>>>>> @@ -3518,14 +3533,21 @@ static void >>>>>>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev) >>>>>>> adev->asic_reset_res = 0; >>>>>>> } >>>>>>> -static void amdgpu_device_lock_adev(struct amdgpu_device *adev) >>>>>>> +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, >>>>>>> bool >>>>>>> trylock) >>>>>>> { >>>>>>> - mutex_lock(&adev->lock_reset); >>>>>>> + if (trylock) { >>>>>>> + if (!mutex_trylock(&adev->lock_reset)) >>>>>>> + return false; >>>>>>> + } else >>>>>>> + mutex_lock(&adev->lock_reset); >>>>>>> + >>>>>>> atomic_inc(&adev->gpu_reset_counter); >>>>>>> adev->in_gpu_reset = 1; >>>>>>> /* Block kfd: SRIOV would do it separately */ >>>>>>> if (!amdgpu_sriov_vf(adev)) >>>>>>> amdgpu_amdkfd_pre_reset(adev); >>>>>>> + >>>>>>> + return true; >>>>>>> } >>>>>>> static void amdgpu_device_unlock_adev(struct amdgpu_device >>>>>>> *adev) >>>>>>> @@ -3555,29 +3577,44 @@ int amdgpu_device_gpu_recover(struct >>>>>>> amdgpu_device *adev, >>>>>>> { >>>>>>> int r; >>>>>>> struct amdgpu_hive_info *hive = NULL; >>>>>>> - bool need_full_reset = false; >>>>>>> struct amdgpu_device *tmp_adev = NULL; >>>>>>> struct list_head device_list, *device_list_handle = NULL; >>>>>>> + bool xgmi_topology_present, need_full_reset, job_signaled; >>>>>>> + need_full_reset = job_signaled = false; >>>>>>> INIT_LIST_HEAD(&device_list); >>>>>>> dev_info(adev->dev, "GPU reset begin!\n"); >>>>>>> + hive = amdgpu_get_xgmi_hive(adev, 0); >>>>>>> + xgmi_topology_present = hive && >>>>>>> adev->gmc.xgmi.num_physical_nodes > 1; >>>>>>> + >>>>>>> /* >>>>>>> - * In case of XGMI hive disallow concurrent resets to be >>>>>>> triggered >>>>>>> - * by different nodes. No point also since the one node already >>>>>>> executing >>>>>>> - * reset will also reset all the other nodes in the hive. >>>>>>> + * Here we trylock to avoid chain of resets executing from >>>>>>> + * either trigger by jobs on different adevs in XGMI hive or >>>>>>> jobs on >>>>>>> + * different schedulers for same device while this tdr is >>>>>>> running. >>>>>>> + * We always reset all schedulers for device and all devices >>>>>>> for >>>>>>> XGMI >>>>>>> + * hive so that should take care of them too. >>>>>>> */ >>>>>>> - hive = amdgpu_get_xgmi_hive(adev, 0); >>>>>>> - if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && >>>>>>> - !mutex_trylock(&hive->reset_lock)) >>>>>>> + >>>>>>> + if (xgmi_topology_present && >>>>>>> !mutex_trylock(&hive->reset_lock)) { >>>>>>> + DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as >>>>>>> another already in progress", >>>>>>> + job->base.id, hive->hive_id); >>>>>>> return 0; >>>>>>> + } >>>>>>> /* Start with adev pre asic reset first for soft reset >>>>>>> check.*/ >>>>>>> - amdgpu_device_lock_adev(adev); >>>>>>> + if (!amdgpu_device_lock_adev(adev, !xgmi_topology_present)) { >>>>>>> + DRM_INFO("Bailing on TDR for s_job:%llx, as another already >>>>>>> in progress", >>>>>>> + job->base.id); >>>>>>> + return 0; >>>>>>> + } >>>>>>> + >>>>>>> + /* Guilty job will be freed after this*/ >>>>>>> r = amdgpu_device_pre_asic_reset(adev, >>>>>>> job, >>>>>>> - &need_full_reset); >>>>>>> + &need_full_reset, >>>>>>> + &job_signaled); >>>>>>> if (r) { >>>>>>> /*TODO Should we stop ?*/ >>>>>>> DRM_ERROR("GPU pre asic reset failed with err, %d for >>>>>>> drm >>>>>>> dev, %s ", >>>>>>> @@ -3609,10 +3646,11 @@ int amdgpu_device_gpu_recover(struct >>>>>>> amdgpu_device *adev, >>>>>>> if (tmp_adev == adev) >>>>>>> continue; >>>>>>> - amdgpu_device_lock_adev(tmp_adev); >>>>>>> + amdgpu_device_lock_adev(tmp_adev, false); >>>>>>> r = amdgpu_device_pre_asic_reset(tmp_adev, >>>>>>> NULL, >>>>>>> - &need_full_reset); >>>>>>> + &need_full_reset, >>>>>>> + &job_signaled); >>>>>>> /*TODO Should we stop ?*/ >>>>>>> if (r) { >>>>>>> DRM_ERROR("GPU pre asic reset failed with err, %d >>>>>>> for >>>>>>> drm dev, %s ", >>>>>>> @@ -3623,19 +3661,21 @@ int amdgpu_device_gpu_recover(struct >>>>>>> amdgpu_device *adev, >>>>>>> /* Actual ASIC resets if needed.*/ >>>>>>> /* TODO Implement XGMI hive reset logic for SRIOV */ >>>>>>> - if (amdgpu_sriov_vf(adev)) { >>>>>>> - r = amdgpu_device_reset_sriov(adev, job ? false : true); >>>>>>> - if (r) >>>>>>> - adev->asic_reset_res = r; >>>>>>> - } else { >>>>>>> - r = amdgpu_do_asic_reset(hive, device_list_handle, >>>>>>> &need_full_reset); >>>>>>> - if (r && r == -EAGAIN) >>>>>>> - goto retry; >>>>>>> + if (!job || !job_signaled) { >>>>>>> + if (amdgpu_sriov_vf(adev)) { >>>>>>> + r = amdgpu_device_reset_sriov(adev, job ? false : >>>>>>> true); >>>>>>> + if (r) >>>>>>> + adev->asic_reset_res = r; >>>>>>> + } else { >>>>>>> + r = amdgpu_do_asic_reset(hive, device_list_handle, >>>>>>> &need_full_reset); >>>>>>> + if (r && r == -EAGAIN) >>>>>>> + goto retry; >>>>>>> + } >>>>>>> } >>>>>>> /* Post ASIC reset for all devs .*/ >>>>>>> list_for_each_entry(tmp_adev, device_list_handle, >>>>>>> gmc.xgmi.head) { >>>>>>> - amdgpu_device_post_asic_reset(tmp_adev); >>>>>>> + amdgpu_device_post_asic_reset(tmp_adev, job_signaled); >>>>>>> if (r) { >>>>>>> /* bad news, how to tell it to userspace ? */ >>>>>>> @@ -3648,7 +3688,7 @@ int amdgpu_device_gpu_recover(struct >>>>>>> amdgpu_device *adev, >>>>>>> amdgpu_device_unlock_adev(tmp_adev); >>>>>>> } >>>>>>> - if (hive && adev->gmc.xgmi.num_physical_nodes > 1) >>>>>>> + if (xgmi_topology_present) >>>>>>> mutex_unlock(&hive->reset_lock); >>>>>>> if (r) >>>>> _______________________________________________ >>>>> amd-gfx mailing list >>>>> amd-gfx@lists.freedesktop.org >>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >>> _______________________________________________ >>> amd-gfx mailing list >>> amd-gfx@lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index aabd043..4446892 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3327,10 +3327,12 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_job *job, - bool *need_full_reset_arg) + bool *need_full_reset_arg, + bool *job_signaled) { int i, r = 0; bool need_full_reset = *need_full_reset_arg; + struct amdgpu_ring *job_ring = job ? to_amdgpu_ring(job->base.sched) : NULL; /* block all schedulers and reset given job's ring */ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { @@ -3341,6 +3343,17 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, drm_sched_stop(&ring->sched, &job->base); + /* + * Must check guilty signal here since after this point all old + * HW fences are force signaled. + * + * job->base holds a reference to parent fence + */ + if (job_signaled && job && ring == job_ring && + job->base.s_fence->parent && + dma_fence_is_signaled(job->base.s_fence->parent)) + *job_signaled = true; + /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(ring); } @@ -3358,7 +3371,8 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, - if (!amdgpu_sriov_vf(adev)) { + /* Don't suspend on bare metal if we are not going to HW reset the ASIC */ + if (!amdgpu_sriov_vf(adev) && !(*job_signaled)) { if (!need_full_reset) need_full_reset = amdgpu_device_ip_need_full_reset(adev); @@ -3495,7 +3509,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, return r; } -static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev) +static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev, bool job_signaled) { int i; @@ -3505,7 +3519,8 @@ static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev) if (!ring || !ring->sched.thread) continue; - if (!adev->asic_reset_res) + /* No point to resubmit jobs if we didn't HW reset*/ + if (!adev->asic_reset_res && !job_signaled) drm_sched_resubmit_jobs(&ring->sched); drm_sched_start(&ring->sched, !adev->asic_reset_res); @@ -3518,14 +3533,21 @@ static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev) adev->asic_reset_res = 0; } -static void amdgpu_device_lock_adev(struct amdgpu_device *adev) +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock) { - mutex_lock(&adev->lock_reset); + if (trylock) { + if (!mutex_trylock(&adev->lock_reset)) + return false; + } else + mutex_lock(&adev->lock_reset); + atomic_inc(&adev->gpu_reset_counter); adev->in_gpu_reset = 1; /* Block kfd: SRIOV would do it separately */ if (!amdgpu_sriov_vf(adev)) amdgpu_amdkfd_pre_reset(adev); + + return true; } static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) @@ -3555,29 +3577,44 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, { int r; struct amdgpu_hive_info *hive = NULL; - bool need_full_reset = false; struct amdgpu_device *tmp_adev = NULL; struct list_head device_list, *device_list_handle = NULL; + bool xgmi_topology_present, need_full_reset, job_signaled; + need_full_reset = job_signaled = false; INIT_LIST_HEAD(&device_list); dev_info(adev->dev, "GPU reset begin!\n"); + hive = amdgpu_get_xgmi_hive(adev, 0); + xgmi_topology_present = hive && adev->gmc.xgmi.num_physical_nodes > 1; + /* - * In case of XGMI hive disallow concurrent resets to be triggered - * by different nodes. No point also since the one node already executing - * reset will also reset all the other nodes in the hive. + * Here we trylock to avoid chain of resets executing from + * either trigger by jobs on different adevs in XGMI hive or jobs on + * different schedulers for same device while this tdr is running. + * We always reset all schedulers for device and all devices for XGMI + * hive so that should take care of them too. */ - hive = amdgpu_get_xgmi_hive(adev, 0); - if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && - !mutex_trylock(&hive->reset_lock)) + + if (xgmi_topology_present && !mutex_trylock(&hive->reset_lock)) { + DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", + job->base.id, hive->hive_id); return 0; + } /* Start with adev pre asic reset first for soft reset check.*/ - amdgpu_device_lock_adev(adev); + if (!amdgpu_device_lock_adev(adev, !xgmi_topology_present)) { + DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress", + job->base.id); + return 0; + } + + /* Guilty job will be freed after this*/ r = amdgpu_device_pre_asic_reset(adev, job, - &need_full_reset); + &need_full_reset, + &job_signaled); if (r) { /*TODO Should we stop ?*/ DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", @@ -3609,10 +3646,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, if (tmp_adev == adev) continue; - amdgpu_device_lock_adev(tmp_adev); + amdgpu_device_lock_adev(tmp_adev, false); r = amdgpu_device_pre_asic_reset(tmp_adev, NULL, - &need_full_reset); + &need_full_reset, + &job_signaled); /*TODO Should we stop ?*/ if (r) { DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", @@ -3623,19 +3661,21 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* Actual ASIC resets if needed.*/ /* TODO Implement XGMI hive reset logic for SRIOV */ - if (amdgpu_sriov_vf(adev)) { - r = amdgpu_device_reset_sriov(adev, job ? false : true); - if (r) - adev->asic_reset_res = r; - } else { - r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset); - if (r && r == -EAGAIN) - goto retry; + if (!job || !job_signaled) { + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_device_reset_sriov(adev, job ? false : true); + if (r) + adev->asic_reset_res = r; + } else { + r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset); + if (r && r == -EAGAIN) + goto retry; + } } /* Post ASIC reset for all devs .*/ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { - amdgpu_device_post_asic_reset(tmp_adev); + amdgpu_device_post_asic_reset(tmp_adev, job_signaled); if (r) { /* bad news, how to tell it to userspace ? */ @@ -3648,7 +3688,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, amdgpu_device_unlock_adev(tmp_adev); } - if (hive && adev->gmc.xgmi.num_physical_nodes > 1) + if (xgmi_topology_present) mutex_unlock(&hive->reset_lock); if (r)
Also reject TDRs if another one already running. Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 94 +++++++++++++++++++++--------- 1 file changed, 67 insertions(+), 27 deletions(-)