Message ID | 20230322091900.1982453-3-stanislaw.gruszka@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | acell/ivpu: Fixes for 6.3 | expand |
On 3/22/2023 3:18 AM, Stanislaw Gruszka wrote: > Prevent running recovery_work after device is removed. > > Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com> > --- > drivers/accel/ivpu/ivpu_drv.c | 2 ++ > drivers/accel/ivpu/ivpu_pm.c | 15 +++++++++++++-- > drivers/accel/ivpu/ivpu_pm.h | 1 + > 3 files changed, 16 insertions(+), 2 deletions(-) > > diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c > index ac06bbfca920..d9e311b40348 100644 > --- a/drivers/accel/ivpu/ivpu_drv.c > +++ b/drivers/accel/ivpu/ivpu_drv.c > @@ -580,6 +580,8 @@ static void ivpu_dev_fini(struct ivpu_device *vdev) > ivpu_pm_disable(vdev); > ivpu_shutdown(vdev); > ivpu_job_done_thread_fini(vdev); > + ivpu_pm_cancel_recovery(vdev); > + > ivpu_ipc_fini(vdev); > ivpu_fw_fini(vdev); > ivpu_mmu_global_context_fini(vdev); > diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c > index a880f1dd857e..df2e98cc0a56 100644 > --- a/drivers/accel/ivpu/ivpu_pm.c > +++ b/drivers/accel/ivpu/ivpu_pm.c > @@ -98,11 +98,17 @@ static int ivpu_resume(struct ivpu_device *vdev) > static void ivpu_pm_recovery_work(struct work_struct *work) > { > struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); > - struct ivpu_device *vdev = pm->vdev; > + struct ivpu_device *vdev = pm->vdev; > char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; > int ret; > > - ret = pci_reset_function(to_pci_dev(vdev->drm.dev)); > +retry: > + ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); > + if (ret == -EAGAIN && !drm_dev_is_unplugged(&vdev->drm)) { > + cond_resched(); > + goto retry; > + } > + > if (ret) > ivpu_err(vdev, "Failed to reset VPU: %d\n", ret); I'm unsure about this now. Yes, you did fail to reset the VPU, but is it an error if the device is unplugged? Feels like this message could be misleading in that corner case. > @@ -302,6 +308,11 @@ int ivpu_pm_init(struct ivpu_device *vdev) > return 0; > } > > +void ivpu_pm_cancel_recovery(struct ivpu_device *vdev) > +{ > + cancel_work_sync(&vdev->pm->recovery_work); > +} > + > void ivpu_pm_enable(struct ivpu_device *vdev) > { > struct device *dev = vdev->drm.dev; > diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h > index dc1b3758e13f..baca98187255 100644 > --- a/drivers/accel/ivpu/ivpu_pm.h > +++ b/drivers/accel/ivpu/ivpu_pm.h > @@ -21,6 +21,7 @@ struct ivpu_pm_info { > int ivpu_pm_init(struct ivpu_device *vdev); > void ivpu_pm_enable(struct ivpu_device *vdev); > void ivpu_pm_disable(struct ivpu_device *vdev); > +void ivpu_pm_cancel_recovery(struct ivpu_device *vdev); > > int ivpu_pm_suspend_cb(struct device *dev); > int ivpu_pm_resume_cb(struct device *dev);
On Wed, Mar 22, 2023 at 08:21:03AM -0600, Jeffrey Hugo wrote: > On 3/22/2023 3:18 AM, Stanislaw Gruszka wrote: > > Prevent running recovery_work after device is removed. > > > > Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com> > > --- > > drivers/accel/ivpu/ivpu_drv.c | 2 ++ > > drivers/accel/ivpu/ivpu_pm.c | 15 +++++++++++++-- > > drivers/accel/ivpu/ivpu_pm.h | 1 + > > 3 files changed, 16 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c > > index ac06bbfca920..d9e311b40348 100644 > > --- a/drivers/accel/ivpu/ivpu_drv.c > > +++ b/drivers/accel/ivpu/ivpu_drv.c > > @@ -580,6 +580,8 @@ static void ivpu_dev_fini(struct ivpu_device *vdev) > > ivpu_pm_disable(vdev); > > ivpu_shutdown(vdev); > > ivpu_job_done_thread_fini(vdev); > > + ivpu_pm_cancel_recovery(vdev); > > + > > ivpu_ipc_fini(vdev); > > ivpu_fw_fini(vdev); > > ivpu_mmu_global_context_fini(vdev); > > diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c > > index a880f1dd857e..df2e98cc0a56 100644 > > --- a/drivers/accel/ivpu/ivpu_pm.c > > +++ b/drivers/accel/ivpu/ivpu_pm.c > > @@ -98,11 +98,17 @@ static int ivpu_resume(struct ivpu_device *vdev) > > static void ivpu_pm_recovery_work(struct work_struct *work) > > { > > struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); > > - struct ivpu_device *vdev = pm->vdev; > > + struct ivpu_device *vdev = pm->vdev; > > char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; > > int ret; > > - ret = pci_reset_function(to_pci_dev(vdev->drm.dev)); > > +retry: > > + ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); > > + if (ret == -EAGAIN && !drm_dev_is_unplugged(&vdev->drm)) { > > + cond_resched(); > > + goto retry; > > + } > > + > > if (ret) > > ivpu_err(vdev, "Failed to reset VPU: %d\n", ret); > > I'm unsure about this now. Yes, you did fail to reset the VPU, but is it an > error if the device is unplugged? Feels like this message could be > misleading in that corner case. Yes, it's not elegant to print error message on unplug. I'll change this to: if (ret && ret != -EAGAIN) ivpu_err(vdev, "Failed to reset VPU: %d\n", ret); Regards Stanislaw
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index ac06bbfca920..d9e311b40348 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -580,6 +580,8 @@ static void ivpu_dev_fini(struct ivpu_device *vdev) ivpu_pm_disable(vdev); ivpu_shutdown(vdev); ivpu_job_done_thread_fini(vdev); + ivpu_pm_cancel_recovery(vdev); + ivpu_ipc_fini(vdev); ivpu_fw_fini(vdev); ivpu_mmu_global_context_fini(vdev); diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index a880f1dd857e..df2e98cc0a56 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -98,11 +98,17 @@ static int ivpu_resume(struct ivpu_device *vdev) static void ivpu_pm_recovery_work(struct work_struct *work) { struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); - struct ivpu_device *vdev = pm->vdev; + struct ivpu_device *vdev = pm->vdev; char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; int ret; - ret = pci_reset_function(to_pci_dev(vdev->drm.dev)); +retry: + ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); + if (ret == -EAGAIN && !drm_dev_is_unplugged(&vdev->drm)) { + cond_resched(); + goto retry; + } + if (ret) ivpu_err(vdev, "Failed to reset VPU: %d\n", ret); @@ -302,6 +308,11 @@ int ivpu_pm_init(struct ivpu_device *vdev) return 0; } +void ivpu_pm_cancel_recovery(struct ivpu_device *vdev) +{ + cancel_work_sync(&vdev->pm->recovery_work); +} + void ivpu_pm_enable(struct ivpu_device *vdev) { struct device *dev = vdev->drm.dev; diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h index dc1b3758e13f..baca98187255 100644 --- a/drivers/accel/ivpu/ivpu_pm.h +++ b/drivers/accel/ivpu/ivpu_pm.h @@ -21,6 +21,7 @@ struct ivpu_pm_info { int ivpu_pm_init(struct ivpu_device *vdev); void ivpu_pm_enable(struct ivpu_device *vdev); void ivpu_pm_disable(struct ivpu_device *vdev); +void ivpu_pm_cancel_recovery(struct ivpu_device *vdev); int ivpu_pm_suspend_cb(struct device *dev); int ivpu_pm_resume_cb(struct device *dev);
Prevent running recovery_work after device is removed. Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com> --- drivers/accel/ivpu/ivpu_drv.c | 2 ++ drivers/accel/ivpu/ivpu_pm.c | 15 +++++++++++++-- drivers/accel/ivpu/ivpu_pm.h | 1 + 3 files changed, 16 insertions(+), 2 deletions(-)