Message ID | 20210510163625.407105-6-andrey.grodzovsky@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | RFC Support hot device unplug in amdgpu | expand |
Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky: > Use it to call disply code dependent on device->drv_data > before it's set to NULL on device unplug > > v5: Move HW finilization into this callback to prevent MMIO accesses > post cpi remove. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Acked-by: Christian König <christian.koenig@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 59 +++++++++++++------ > .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++- > drivers/gpu/drm/amd/include/amd_shared.h | 2 + > 3 files changed, 52 insertions(+), 21 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 3760ce7d8ff8..18598eda18f6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -2558,34 +2558,26 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) > return 0; > } > > -/** > - * amdgpu_device_ip_fini - run fini for hardware IPs > - * > - * @adev: amdgpu_device pointer > - * > - * Main teardown pass for hardware IPs. The list of all the hardware > - * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks > - * are run. hw_fini tears down the hardware associated with each IP > - * and sw_fini tears down any software state associated with each IP. > - * Returns 0 on success, negative error code on failure. > - */ > -static int amdgpu_device_ip_fini(struct amdgpu_device *adev) > +static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) > { > int i, r; > > - if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) > - amdgpu_virt_release_ras_err_handler_data(adev); > + for (i = 0; i < adev->num_ip_blocks; i++) { > + if (!adev->ip_blocks[i].version->funcs->early_fini) > + continue; > > - amdgpu_ras_pre_fini(adev); > + r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev); > + if (r) { > + DRM_DEBUG("early_fini of IP block <%s> failed %d\n", > + adev->ip_blocks[i].version->funcs->name, r); > + } > + } > > - if (adev->gmc.xgmi.num_physical_nodes > 1) > - amdgpu_xgmi_remove_device(adev); > + amdgpu_amdkfd_suspend(adev, false); > > amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); > amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); > > - amdgpu_amdkfd_device_fini(adev); > - > /* need to disable SMC first */ > for (i = 0; i < adev->num_ip_blocks; i++) { > if (!adev->ip_blocks[i].status.hw) > @@ -2616,6 +2608,33 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) > adev->ip_blocks[i].status.hw = false; > } > > + return 0; > +} > + > +/** > + * amdgpu_device_ip_fini - run fini for hardware IPs > + * > + * @adev: amdgpu_device pointer > + * > + * Main teardown pass for hardware IPs. The list of all the hardware > + * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks > + * are run. hw_fini tears down the hardware associated with each IP > + * and sw_fini tears down any software state associated with each IP. > + * Returns 0 on success, negative error code on failure. > + */ > +static int amdgpu_device_ip_fini(struct amdgpu_device *adev) > +{ > + int i, r; > + > + if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) > + amdgpu_virt_release_ras_err_handler_data(adev); > + > + amdgpu_ras_pre_fini(adev); > + > + if (adev->gmc.xgmi.num_physical_nodes > 1) > + amdgpu_xgmi_remove_device(adev); > + > + amdgpu_amdkfd_device_fini_sw(adev); > > for (i = adev->num_ip_blocks - 1; i >= 0; i--) { > if (!adev->ip_blocks[i].status.sw) > @@ -3683,6 +3702,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) > amdgpu_fbdev_fini(adev); > > amdgpu_irq_fini_hw(adev); > + > + amdgpu_device_ip_fini_early(adev); > } > > void amdgpu_device_fini_sw(struct amdgpu_device *adev) > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > index 296704ce3768..6c2c6a51ce6c 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > @@ -1251,6 +1251,15 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) > return -EINVAL; > } > > +static int amdgpu_dm_early_fini(void *handle) > +{ > + struct amdgpu_device *adev = (struct amdgpu_device *)handle; > + > + amdgpu_dm_audio_fini(adev); > + > + return 0; > +} > + > static void amdgpu_dm_fini(struct amdgpu_device *adev) > { > int i; > @@ -1259,8 +1268,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) > drm_encoder_cleanup(&adev->dm.mst_encoders[i].base); > } > > - amdgpu_dm_audio_fini(adev); > - > amdgpu_dm_destroy_drm_device(&adev->dm); > > #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) > @@ -2298,6 +2305,7 @@ static const struct amd_ip_funcs amdgpu_dm_funcs = { > .late_init = dm_late_init, > .sw_init = dm_sw_init, > .sw_fini = dm_sw_fini, > + .early_fini = amdgpu_dm_early_fini, > .hw_init = dm_hw_init, > .hw_fini = dm_hw_fini, > .suspend = dm_suspend, > diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h > index 43ed6291b2b8..1ad56da486e4 100644 > --- a/drivers/gpu/drm/amd/include/amd_shared.h > +++ b/drivers/gpu/drm/amd/include/amd_shared.h > @@ -240,6 +240,7 @@ enum amd_dpm_forced_level; > * @late_init: sets up late driver/hw state (post hw_init) - Optional > * @sw_init: sets up driver state, does not configure hw > * @sw_fini: tears down driver state, does not configure hw > + * @early_fini: tears down stuff before dev detached from driver > * @hw_init: sets up the hw state > * @hw_fini: tears down the hw state > * @late_fini: final cleanup > @@ -268,6 +269,7 @@ struct amd_ip_funcs { > int (*late_init)(void *handle); > int (*sw_init)(void *handle); > int (*sw_fini)(void *handle); > + int (*early_fini)(void *handle); > int (*hw_init)(void *handle); > int (*hw_fini)(void *handle); > void (*late_fini)(void *handle);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3760ce7d8ff8..18598eda18f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2558,34 +2558,26 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) return 0; } -/** - * amdgpu_device_ip_fini - run fini for hardware IPs - * - * @adev: amdgpu_device pointer - * - * Main teardown pass for hardware IPs. The list of all the hardware - * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks - * are run. hw_fini tears down the hardware associated with each IP - * and sw_fini tears down any software state associated with each IP. - * Returns 0 on success, negative error code on failure. - */ -static int amdgpu_device_ip_fini(struct amdgpu_device *adev) +static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) { int i, r; - if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) - amdgpu_virt_release_ras_err_handler_data(adev); + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].version->funcs->early_fini) + continue; - amdgpu_ras_pre_fini(adev); + r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev); + if (r) { + DRM_DEBUG("early_fini of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + } + } - if (adev->gmc.xgmi.num_physical_nodes > 1) - amdgpu_xgmi_remove_device(adev); + amdgpu_amdkfd_suspend(adev, false); amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); - amdgpu_amdkfd_device_fini(adev); - /* need to disable SMC first */ for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.hw) @@ -2616,6 +2608,33 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } + return 0; +} + +/** + * amdgpu_device_ip_fini - run fini for hardware IPs + * + * @adev: amdgpu_device pointer + * + * Main teardown pass for hardware IPs. The list of all the hardware + * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks + * are run. hw_fini tears down the hardware associated with each IP + * and sw_fini tears down any software state associated with each IP. + * Returns 0 on success, negative error code on failure. + */ +static int amdgpu_device_ip_fini(struct amdgpu_device *adev) +{ + int i, r; + + if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) + amdgpu_virt_release_ras_err_handler_data(adev); + + amdgpu_ras_pre_fini(adev); + + if (adev->gmc.xgmi.num_physical_nodes > 1) + amdgpu_xgmi_remove_device(adev); + + amdgpu_amdkfd_device_fini_sw(adev); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.sw) @@ -3683,6 +3702,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_fbdev_fini(adev); amdgpu_irq_fini_hw(adev); + + amdgpu_device_ip_fini_early(adev); } void amdgpu_device_fini_sw(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 296704ce3768..6c2c6a51ce6c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1251,6 +1251,15 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) return -EINVAL; } +static int amdgpu_dm_early_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_dm_audio_fini(adev); + + return 0; +} + static void amdgpu_dm_fini(struct amdgpu_device *adev) { int i; @@ -1259,8 +1268,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) drm_encoder_cleanup(&adev->dm.mst_encoders[i].base); } - amdgpu_dm_audio_fini(adev); - amdgpu_dm_destroy_drm_device(&adev->dm); #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) @@ -2298,6 +2305,7 @@ static const struct amd_ip_funcs amdgpu_dm_funcs = { .late_init = dm_late_init, .sw_init = dm_sw_init, .sw_fini = dm_sw_fini, + .early_fini = amdgpu_dm_early_fini, .hw_init = dm_hw_init, .hw_fini = dm_hw_fini, .suspend = dm_suspend, diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 43ed6291b2b8..1ad56da486e4 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -240,6 +240,7 @@ enum amd_dpm_forced_level; * @late_init: sets up late driver/hw state (post hw_init) - Optional * @sw_init: sets up driver state, does not configure hw * @sw_fini: tears down driver state, does not configure hw + * @early_fini: tears down stuff before dev detached from driver * @hw_init: sets up the hw state * @hw_fini: tears down the hw state * @late_fini: final cleanup @@ -268,6 +269,7 @@ struct amd_ip_funcs { int (*late_init)(void *handle); int (*sw_init)(void *handle); int (*sw_fini)(void *handle); + int (*early_fini)(void *handle); int (*hw_init)(void *handle); int (*hw_fini)(void *handle); void (*late_fini)(void *handle);
Use it to call disply code dependent on device->drv_data before it's set to NULL on device unplug v5: Move HW finilization into this callback to prevent MMIO accesses post cpi remove. Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 59 +++++++++++++------ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++- drivers/gpu/drm/amd/include/amd_shared.h | 2 + 3 files changed, 52 insertions(+), 21 deletions(-)