Message ID | 20220303230131.2103-10-shameerali.kolothum.thodi@huawei.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | vfio/hisilicon: add ACC live migration driver | expand |
On Thu, Mar 03, 2022 at 11:01:31PM +0000, Shameer Kolothum wrote: > Register private handler for pci_error_handlers.reset_done and update > state accordingly. > > Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> > --- > .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 57 ++++++++++++++++++- > .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 4 +- > 2 files changed, 57 insertions(+), 4 deletions(-) It looks OK to me Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Jason
On 2022/3/4 7:01, Shameer Kolothum wrote: > Register private handler for pci_error_handlers.reset_done and update > state accordingly. > > Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> > --- > .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 57 ++++++++++++++++++- > .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 4 +- > 2 files changed, 57 insertions(+), 4 deletions(-) > Reviewed-by: Longfang Liu <liulongfang@huawei.com> Thanks, Longfang > diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c > index aa2e4b6bf598..53e4c5cb3a71 100644 > --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c > +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c > @@ -625,6 +625,27 @@ static void hisi_acc_vf_disable_fds(struct hisi_acc_vf_core_device *hisi_acc_vde > } > } > > +/* > + * This function is called in all state_mutex unlock cases to > + * handle a 'deferred_reset' if exists. > + */ > +static void > +hisi_acc_vf_state_mutex_unlock(struct hisi_acc_vf_core_device *hisi_acc_vdev) > +{ > +again: > + spin_lock(&hisi_acc_vdev->reset_lock); > + if (hisi_acc_vdev->deferred_reset) { > + hisi_acc_vdev->deferred_reset = false; > + spin_unlock(&hisi_acc_vdev->reset_lock); > + hisi_acc_vdev->vf_qm_state = QM_NOT_READY; > + hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING; > + hisi_acc_vf_disable_fds(hisi_acc_vdev); > + goto again; > + } > + mutex_unlock(&hisi_acc_vdev->state_mutex); > + spin_unlock(&hisi_acc_vdev->reset_lock); > +} > + > static void hisi_acc_vf_start_device(struct hisi_acc_vf_core_device *hisi_acc_vdev) > { > struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; > @@ -921,7 +942,7 @@ hisi_acc_vfio_pci_set_device_state(struct vfio_device *vdev, > break; > } > } > - mutex_unlock(&hisi_acc_vdev->state_mutex); > + hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev); > return res; > } > > @@ -934,10 +955,35 @@ hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev, > > mutex_lock(&hisi_acc_vdev->state_mutex); > *curr_state = hisi_acc_vdev->mig_state; > - mutex_unlock(&hisi_acc_vdev->state_mutex); > + hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev); > return 0; > } > > +static void hisi_acc_vf_pci_aer_reset_done(struct pci_dev *pdev) > +{ > + struct hisi_acc_vf_core_device *hisi_acc_vdev = dev_get_drvdata(&pdev->dev); > + > + if (hisi_acc_vdev->core_device.vdev.migration_flags != > + VFIO_MIGRATION_STOP_COPY) > + return; > + > + /* > + * As the higher VFIO layers are holding locks across reset and using > + * those same locks with the mm_lock we need to prevent ABBA deadlock > + * with the state_mutex and mm_lock. > + * In case the state_mutex was taken already we defer the cleanup work > + * to the unlock flow of the other running context. > + */ > + spin_lock(&hisi_acc_vdev->reset_lock); > + hisi_acc_vdev->deferred_reset = true; > + if (!mutex_trylock(&hisi_acc_vdev->state_mutex)) { > + spin_unlock(&hisi_acc_vdev->reset_lock); > + return; > + } > + spin_unlock(&hisi_acc_vdev->reset_lock); > + hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev); > +} > + > static int hisi_acc_vf_qm_init(struct hisi_acc_vf_core_device *hisi_acc_vdev) > { > struct vfio_pci_core_device *vdev = &hisi_acc_vdev->core_device; > @@ -1252,12 +1298,17 @@ static const struct pci_device_id hisi_acc_vfio_pci_table[] = { > > MODULE_DEVICE_TABLE(pci, hisi_acc_vfio_pci_table); > > +static const struct pci_error_handlers hisi_acc_vf_err_handlers = { > + .reset_done = hisi_acc_vf_pci_aer_reset_done, > + .error_detected = vfio_pci_core_aer_err_detected, > +}; > + > static struct pci_driver hisi_acc_vfio_pci_driver = { > .name = KBUILD_MODNAME, > .id_table = hisi_acc_vfio_pci_table, > .probe = hisi_acc_vfio_pci_probe, > .remove = hisi_acc_vfio_pci_remove, > - .err_handler = &vfio_pci_core_err_handlers, > + .err_handler = &hisi_acc_vf_err_handlers, > }; > > module_pci_driver(hisi_acc_vfio_pci_driver); > diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h > index 1c7d75408790..5494f4983bbe 100644 > --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h > +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h > @@ -98,6 +98,7 @@ struct hisi_acc_vf_migration_file { > > struct hisi_acc_vf_core_device { > struct vfio_pci_core_device core_device; > + u8 deferred_reset:1; > /* for migration state */ > struct mutex state_mutex; > enum vfio_device_mig_state mig_state; > @@ -107,7 +108,8 @@ struct hisi_acc_vf_core_device { > struct hisi_qm vf_qm; > u32 vf_qm_state; > int vf_id; > - > + /* for reset handler */ > + spinlock_t reset_lock; > struct hisi_acc_vf_migration_file *resuming_migf; > struct hisi_acc_vf_migration_file *saving_migf; > }; >
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index aa2e4b6bf598..53e4c5cb3a71 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -625,6 +625,27 @@ static void hisi_acc_vf_disable_fds(struct hisi_acc_vf_core_device *hisi_acc_vde } } +/* + * This function is called in all state_mutex unlock cases to + * handle a 'deferred_reset' if exists. + */ +static void +hisi_acc_vf_state_mutex_unlock(struct hisi_acc_vf_core_device *hisi_acc_vdev) +{ +again: + spin_lock(&hisi_acc_vdev->reset_lock); + if (hisi_acc_vdev->deferred_reset) { + hisi_acc_vdev->deferred_reset = false; + spin_unlock(&hisi_acc_vdev->reset_lock); + hisi_acc_vdev->vf_qm_state = QM_NOT_READY; + hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING; + hisi_acc_vf_disable_fds(hisi_acc_vdev); + goto again; + } + mutex_unlock(&hisi_acc_vdev->state_mutex); + spin_unlock(&hisi_acc_vdev->reset_lock); +} + static void hisi_acc_vf_start_device(struct hisi_acc_vf_core_device *hisi_acc_vdev) { struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; @@ -921,7 +942,7 @@ hisi_acc_vfio_pci_set_device_state(struct vfio_device *vdev, break; } } - mutex_unlock(&hisi_acc_vdev->state_mutex); + hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev); return res; } @@ -934,10 +955,35 @@ hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev, mutex_lock(&hisi_acc_vdev->state_mutex); *curr_state = hisi_acc_vdev->mig_state; - mutex_unlock(&hisi_acc_vdev->state_mutex); + hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev); return 0; } +static void hisi_acc_vf_pci_aer_reset_done(struct pci_dev *pdev) +{ + struct hisi_acc_vf_core_device *hisi_acc_vdev = dev_get_drvdata(&pdev->dev); + + if (hisi_acc_vdev->core_device.vdev.migration_flags != + VFIO_MIGRATION_STOP_COPY) + return; + + /* + * As the higher VFIO layers are holding locks across reset and using + * those same locks with the mm_lock we need to prevent ABBA deadlock + * with the state_mutex and mm_lock. + * In case the state_mutex was taken already we defer the cleanup work + * to the unlock flow of the other running context. + */ + spin_lock(&hisi_acc_vdev->reset_lock); + hisi_acc_vdev->deferred_reset = true; + if (!mutex_trylock(&hisi_acc_vdev->state_mutex)) { + spin_unlock(&hisi_acc_vdev->reset_lock); + return; + } + spin_unlock(&hisi_acc_vdev->reset_lock); + hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev); +} + static int hisi_acc_vf_qm_init(struct hisi_acc_vf_core_device *hisi_acc_vdev) { struct vfio_pci_core_device *vdev = &hisi_acc_vdev->core_device; @@ -1252,12 +1298,17 @@ static const struct pci_device_id hisi_acc_vfio_pci_table[] = { MODULE_DEVICE_TABLE(pci, hisi_acc_vfio_pci_table); +static const struct pci_error_handlers hisi_acc_vf_err_handlers = { + .reset_done = hisi_acc_vf_pci_aer_reset_done, + .error_detected = vfio_pci_core_aer_err_detected, +}; + static struct pci_driver hisi_acc_vfio_pci_driver = { .name = KBUILD_MODNAME, .id_table = hisi_acc_vfio_pci_table, .probe = hisi_acc_vfio_pci_probe, .remove = hisi_acc_vfio_pci_remove, - .err_handler = &vfio_pci_core_err_handlers, + .err_handler = &hisi_acc_vf_err_handlers, }; module_pci_driver(hisi_acc_vfio_pci_driver); diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h index 1c7d75408790..5494f4983bbe 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h @@ -98,6 +98,7 @@ struct hisi_acc_vf_migration_file { struct hisi_acc_vf_core_device { struct vfio_pci_core_device core_device; + u8 deferred_reset:1; /* for migration state */ struct mutex state_mutex; enum vfio_device_mig_state mig_state; @@ -107,7 +108,8 @@ struct hisi_acc_vf_core_device { struct hisi_qm vf_qm; u32 vf_qm_state; int vf_id; - + /* for reset handler */ + spinlock_t reset_lock; struct hisi_acc_vf_migration_file *resuming_migf; struct hisi_acc_vf_migration_file *saving_migf; };
Register private handler for pci_error_handlers.reset_done and update state accordingly. Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> --- .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 57 ++++++++++++++++++- .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 4 +- 2 files changed, 57 insertions(+), 4 deletions(-)