diff mbox series

[4/6] drm/i915/gsc: Do a driver-FLR on unload if GSC was loaded

Message ID 20221121231617.1110329-5-daniele.ceraolospurio@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: Add support for GSC FW loading | expand

Commit Message

Daniele Ceraolo Spurio Nov. 21, 2022, 11:16 p.m. UTC
If the GSC was loaded, the only way to stop it during the driver unload
flow is to do a driver-FLR.
The driver-FLR is not the same as PCI config space FLR in that
it doesn't reset the SGUnit and doesn't modify the PCI config
space. Thus, it doesn't require a re-enumeration of the PCI BARs.
However, the driver-FLR does cause a memory wipe of graphics memory
on all discrete GPU platforms or a wipe limited to stolen memory
on the integrated GPU platforms.

We perform the FLR as the last action before releasing the MMIO bar, so
that we don't have to care about the consequences of the reset on the
unload flow.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c |  9 +++++
 drivers/gpu/drm/i915/i915_reg.h           |  3 ++
 drivers/gpu/drm/i915/intel_uncore.c       | 45 +++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_uncore.h       | 13 +++++++
 4 files changed, 70 insertions(+)

Comments

Daniele Ceraolo Spurio Nov. 22, 2022, 12:17 a.m. UTC | #1
On 11/21/2022 3:16 PM, Daniele Ceraolo Spurio wrote:
> If the GSC was loaded, the only way to stop it during the driver unload
> flow is to do a driver-FLR.
> The driver-FLR is not the same as PCI config space FLR in that
> it doesn't reset the SGUnit and doesn't modify the PCI config
> space. Thus, it doesn't require a re-enumeration of the PCI BARs.
> However, the driver-FLR does cause a memory wipe of graphics memory
> on all discrete GPU platforms or a wipe limited to stolen memory
> on the integrated GPU platforms.
>
> We perform the FLR as the last action before releasing the MMIO bar, so
> that we don't have to care about the consequences of the reset on the
> unload flow.
>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c |  9 +++++
>   drivers/gpu/drm/i915/i915_reg.h           |  3 ++
>   drivers/gpu/drm/i915/intel_uncore.c       | 45 +++++++++++++++++++++++
>   drivers/gpu/drm/i915/intel_uncore.h       | 13 +++++++
>   4 files changed, 70 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
> index 510fb47193ec..5dad3c19c445 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
> @@ -173,6 +173,15 @@ int intel_gsc_fw_upload(struct intel_gsc_uc *gsc)
>   	if (err)
>   		goto fail;
>   
> +	/*
> +	 * Once the GSC FW is loaded, the only way to kill it on driver unload
> +	 * is to do a driver FLR. Given this is a very disruptive action, we
> +	 * want to do it as the last action before releasing the access to the
> +	 * MMIO bar, which means we need to do it as part of the primary uncore
> +	 * cleanup.
> +	 */
> +	intel_uncore_set_flr_on_fini(&gt->i915->uncore);
> +
>   	/* FW is not fully operational until we enable SW proxy */
>   	intel_uc_fw_change_status(gsc_fw, INTEL_UC_FIRMWARE_TRANSFERRED);
>   
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 8e1892d14774..60e55245200b 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -118,6 +118,9 @@
>   
>   #define GU_CNTL				_MMIO(0x101010)
>   #define   LMEM_INIT			REG_BIT(7)
> +#define   DRIVERFLR			REG_BIT(31)
> +#define GU_DEBUG			_MMIO(0x101018)
> +#define   DRIVERFLR_STATUS		REG_BIT(31)
>   
>   #define GEN6_STOLEN_RESERVED		_MMIO(0x1082C0)
>   #define GEN6_STOLEN_RESERVED_ADDR_MASK	(0xFFF << 20)
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 8006a6c61466..c1befa33ff59 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -2703,6 +2703,48 @@ void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore,
>   	}
>   }
>   
> +static void driver_flr(struct intel_uncore *uncore)
> +{
> +	struct drm_i915_private *i915 = uncore->i915;
> +	const unsigned int flr_timeout_ms = 3000; /* specs recommend a 3s wait */
> +	int ret;
> +
> +	drm_dbg(&i915->drm, "Triggering Driver-FLR\n");
> +
> +	/*
> +	 * Make sure any pending FLR requests have cleared by waiting for the
> +	 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
> +	 * to make sure it's not still set from a prior attempt (it's a write to
> +	 * clear bit).
> +	 * Note that we should never be in a situation where a previous attempt
> +	 * is still pending (unless the HW is totally dead), but better to be
> +	 * safe in case something unexpected happens
> +	 */
> +	ret = intel_wait_for_register_fw(uncore, GU_CNTL, DRIVERFLR, 0, flr_timeout_ms);
> +	if (ret) {
> +		drm_err(&i915->drm,
> +			"Failed to wait for Driver-FLR bit to clear! %d\n",
> +			ret);
> +		return;
> +	}
> +	intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
> +
> +	/* Trigger the actual Driver-FLR */
> +	intel_uncore_rmw_fw(uncore, GU_CNTL, 0, DRIVERFLR);
> +
> +	ret = intel_wait_for_register_fw(uncore, GU_DEBUG,
> +					 DRIVERFLR_STATUS, DRIVERFLR_STATUS,
> +					 flr_timeout_ms);
> +	if (ret) {
> +		drm_err(&i915->drm, "wait for Driver-FLR completion failed! %d\n", ret);
> +		return;
> +	}
> +
> +	intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
> +
> +	return;
> +}
> +
>   /* Called via drm-managed action */
>   void intel_uncore_fini_mmio(struct drm_device *dev, void *data)
>   {
> @@ -2716,6 +2758,9 @@ void intel_uncore_fini_mmio(struct drm_device *dev, void *data)
>   		intel_uncore_fw_domains_fini(uncore);
>   		iosf_mbi_punit_release();
>   	}
> +
> +	if (intel_uncore_needs_flr_on_fini(uncore))
> +		driver_flr(uncore);
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
> index 5449146a0624..a9fa0b11e7e4 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.h
> +++ b/drivers/gpu/drm/i915/intel_uncore.h
> @@ -153,6 +153,7 @@ struct intel_uncore {
>   #define UNCORE_HAS_FPGA_DBG_UNCLAIMED	BIT(1)
>   #define UNCORE_HAS_DBG_UNCLAIMED	BIT(2)
>   #define UNCORE_HAS_FIFO			BIT(3)
> +#define UNCORE_NEEDS_FLR_ON_FINI	BIT(3)

Dumb mistake, this should be 4 (and that's why the series is failing on 
older gens that don't support the driver FLR). Will wait for comments 
before re-spinning.

Daniele

>   
>   	const struct intel_forcewake_range *fw_domains_table;
>   	unsigned int fw_domains_table_entries;
> @@ -223,6 +224,18 @@ intel_uncore_has_fifo(const struct intel_uncore *uncore)
>   	return uncore->flags & UNCORE_HAS_FIFO;
>   }
>   
> +static inline bool
> +intel_uncore_needs_flr_on_fini(const struct intel_uncore *uncore)
> +{
> +	return uncore->flags & UNCORE_NEEDS_FLR_ON_FINI;
> +}
> +
> +static inline bool
> +intel_uncore_set_flr_on_fini(struct intel_uncore *uncore)
> +{
> +	return uncore->flags |= UNCORE_NEEDS_FLR_ON_FINI;
> +}
> +
>   void intel_uncore_mmio_debug_init_early(struct drm_i915_private *i915);
>   void intel_uncore_init_early(struct intel_uncore *uncore,
>   			     struct intel_gt *gt);
Rodrigo Vivi Nov. 22, 2022, 8:46 p.m. UTC | #2
On Mon, Nov 21, 2022 at 03:16:15PM -0800, Daniele Ceraolo Spurio wrote:
> If the GSC was loaded, the only way to stop it during the driver unload
> flow is to do a driver-FLR.
> The driver-FLR is not the same as PCI config space FLR in that
> it doesn't reset the SGUnit and doesn't modify the PCI config
> space. Thus, it doesn't require a re-enumeration of the PCI BARs.
> However, the driver-FLR does cause a memory wipe of graphics memory
> on all discrete GPU platforms or a wipe limited to stolen memory
> on the integrated GPU platforms.

Nothing major or blocking, but a few thoughts:

1. Should we document this in the code, at least in a comment in the
flr function?
2. Should we call this driver_initiated_flr, aiming to reduce even more
the ambiguity of it?

> 
> We perform the FLR as the last action before releasing the MMIO bar, so
> that we don't have to care about the consequences of the reset on the
> unload flow.

3. should we try to implement this already in the gt_reset case as the
last resrouce before wedging the gt? So we can already test this flow
in the current platforms?

> 
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c |  9 +++++
>  drivers/gpu/drm/i915/i915_reg.h           |  3 ++
>  drivers/gpu/drm/i915/intel_uncore.c       | 45 +++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_uncore.h       | 13 +++++++
>  4 files changed, 70 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
> index 510fb47193ec..5dad3c19c445 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
> @@ -173,6 +173,15 @@ int intel_gsc_fw_upload(struct intel_gsc_uc *gsc)
>  	if (err)
>  		goto fail;
>  
> +	/*
> +	 * Once the GSC FW is loaded, the only way to kill it on driver unload
> +	 * is to do a driver FLR. Given this is a very disruptive action, we
> +	 * want to do it as the last action before releasing the access to the
> +	 * MMIO bar, which means we need to do it as part of the primary uncore
> +	 * cleanup.
> +	 */
> +	intel_uncore_set_flr_on_fini(&gt->i915->uncore);
> +
>  	/* FW is not fully operational until we enable SW proxy */
>  	intel_uc_fw_change_status(gsc_fw, INTEL_UC_FIRMWARE_TRANSFERRED);
>  
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 8e1892d14774..60e55245200b 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -118,6 +118,9 @@
>  
>  #define GU_CNTL				_MMIO(0x101010)
>  #define   LMEM_INIT			REG_BIT(7)
> +#define   DRIVERFLR			REG_BIT(31)
> +#define GU_DEBUG			_MMIO(0x101018)
> +#define   DRIVERFLR_STATUS		REG_BIT(31)
>  
>  #define GEN6_STOLEN_RESERVED		_MMIO(0x1082C0)
>  #define GEN6_STOLEN_RESERVED_ADDR_MASK	(0xFFF << 20)
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 8006a6c61466..c1befa33ff59 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -2703,6 +2703,48 @@ void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore,
>  	}
>  }
>  
> +static void driver_flr(struct intel_uncore *uncore)
> +{
> +	struct drm_i915_private *i915 = uncore->i915;
> +	const unsigned int flr_timeout_ms = 3000; /* specs recommend a 3s wait */
> +	int ret;
> +
> +	drm_dbg(&i915->drm, "Triggering Driver-FLR\n");
> +
> +	/*
> +	 * Make sure any pending FLR requests have cleared by waiting for the
> +	 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
> +	 * to make sure it's not still set from a prior attempt (it's a write to
> +	 * clear bit).
> +	 * Note that we should never be in a situation where a previous attempt
> +	 * is still pending (unless the HW is totally dead), but better to be
> +	 * safe in case something unexpected happens
> +	 */
> +	ret = intel_wait_for_register_fw(uncore, GU_CNTL, DRIVERFLR, 0, flr_timeout_ms);
> +	if (ret) {
> +		drm_err(&i915->drm,
> +			"Failed to wait for Driver-FLR bit to clear! %d\n",
> +			ret);
> +		return;
> +	}
> +	intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
> +
> +	/* Trigger the actual Driver-FLR */
> +	intel_uncore_rmw_fw(uncore, GU_CNTL, 0, DRIVERFLR);
> +
> +	ret = intel_wait_for_register_fw(uncore, GU_DEBUG,
> +					 DRIVERFLR_STATUS, DRIVERFLR_STATUS,
> +					 flr_timeout_ms);
> +	if (ret) {
> +		drm_err(&i915->drm, "wait for Driver-FLR completion failed! %d\n", ret);
> +		return;
> +	}
> +
> +	intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
> +
> +	return;
> +}
> +
>  /* Called via drm-managed action */
>  void intel_uncore_fini_mmio(struct drm_device *dev, void *data)
>  {
> @@ -2716,6 +2758,9 @@ void intel_uncore_fini_mmio(struct drm_device *dev, void *data)
>  		intel_uncore_fw_domains_fini(uncore);
>  		iosf_mbi_punit_release();
>  	}
> +
> +	if (intel_uncore_needs_flr_on_fini(uncore))
> +		driver_flr(uncore);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
> index 5449146a0624..a9fa0b11e7e4 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.h
> +++ b/drivers/gpu/drm/i915/intel_uncore.h
> @@ -153,6 +153,7 @@ struct intel_uncore {
>  #define UNCORE_HAS_FPGA_DBG_UNCLAIMED	BIT(1)
>  #define UNCORE_HAS_DBG_UNCLAIMED	BIT(2)
>  #define UNCORE_HAS_FIFO			BIT(3)
> +#define UNCORE_NEEDS_FLR_ON_FINI	BIT(3)
>  
>  	const struct intel_forcewake_range *fw_domains_table;
>  	unsigned int fw_domains_table_entries;
> @@ -223,6 +224,18 @@ intel_uncore_has_fifo(const struct intel_uncore *uncore)
>  	return uncore->flags & UNCORE_HAS_FIFO;
>  }
>  
> +static inline bool
> +intel_uncore_needs_flr_on_fini(const struct intel_uncore *uncore)
> +{
> +	return uncore->flags & UNCORE_NEEDS_FLR_ON_FINI;
> +}
> +
> +static inline bool
> +intel_uncore_set_flr_on_fini(struct intel_uncore *uncore)
> +{
> +	return uncore->flags |= UNCORE_NEEDS_FLR_ON_FINI;
> +}
> +
>  void intel_uncore_mmio_debug_init_early(struct drm_i915_private *i915);
>  void intel_uncore_init_early(struct intel_uncore *uncore,
>  			     struct intel_gt *gt);
> -- 
> 2.37.3
>
Daniele Ceraolo Spurio Nov. 22, 2022, 10:50 p.m. UTC | #3
On 11/22/2022 12:46 PM, Rodrigo Vivi wrote:
> On Mon, Nov 21, 2022 at 03:16:15PM -0800, Daniele Ceraolo Spurio wrote:
>> If the GSC was loaded, the only way to stop it during the driver unload
>> flow is to do a driver-FLR.
>> The driver-FLR is not the same as PCI config space FLR in that
>> it doesn't reset the SGUnit and doesn't modify the PCI config
>> space. Thus, it doesn't require a re-enumeration of the PCI BARs.
>> However, the driver-FLR does cause a memory wipe of graphics memory
>> on all discrete GPU platforms or a wipe limited to stolen memory
>> on the integrated GPU platforms.
> Nothing major or blocking, but a few thoughts:
>
> 1. Should we document this in the code, at least in a comment in the
> flr function?

Sure, I'll add it in

> 2. Should we call this driver_initiated_flr, aiming to reduce even more
> the ambiguity of it?

ok

>
>> We perform the FLR as the last action before releasing the MMIO bar, so
>> that we don't have to care about the consequences of the reset on the
>> unload flow.
> 3. should we try to implement this already in the gt_reset case as the
> last resrouce before wedging the gt? So we can already test this flow
> in the current platforms?

This would be nice to have, but very complicated to implement. The fact 
that FLR kills everything on the system, including resetting display and 
wiping LMEM, means that we would need a new recovery path to 
re-initialize all components. There are also potential questions on how 
to handle LMEM: do we try to migrate it to SMEM before triggering the 
FLR (potentially via CPU memcpy if the GT is dead), or do we just let it 
get wiped?

The reason why I wanted the FLR to be the very last thing before 
releasing MMIO access was exactly to not have to care about the recovery 
path ;)

Daniele

>
>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c |  9 +++++
>>   drivers/gpu/drm/i915/i915_reg.h           |  3 ++
>>   drivers/gpu/drm/i915/intel_uncore.c       | 45 +++++++++++++++++++++++
>>   drivers/gpu/drm/i915/intel_uncore.h       | 13 +++++++
>>   4 files changed, 70 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
>> index 510fb47193ec..5dad3c19c445 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
>> @@ -173,6 +173,15 @@ int intel_gsc_fw_upload(struct intel_gsc_uc *gsc)
>>   	if (err)
>>   		goto fail;
>>   
>> +	/*
>> +	 * Once the GSC FW is loaded, the only way to kill it on driver unload
>> +	 * is to do a driver FLR. Given this is a very disruptive action, we
>> +	 * want to do it as the last action before releasing the access to the
>> +	 * MMIO bar, which means we need to do it as part of the primary uncore
>> +	 * cleanup.
>> +	 */
>> +	intel_uncore_set_flr_on_fini(&gt->i915->uncore);
>> +
>>   	/* FW is not fully operational until we enable SW proxy */
>>   	intel_uc_fw_change_status(gsc_fw, INTEL_UC_FIRMWARE_TRANSFERRED);
>>   
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index 8e1892d14774..60e55245200b 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -118,6 +118,9 @@
>>   
>>   #define GU_CNTL				_MMIO(0x101010)
>>   #define   LMEM_INIT			REG_BIT(7)
>> +#define   DRIVERFLR			REG_BIT(31)
>> +#define GU_DEBUG			_MMIO(0x101018)
>> +#define   DRIVERFLR_STATUS		REG_BIT(31)
>>   
>>   #define GEN6_STOLEN_RESERVED		_MMIO(0x1082C0)
>>   #define GEN6_STOLEN_RESERVED_ADDR_MASK	(0xFFF << 20)
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
>> index 8006a6c61466..c1befa33ff59 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>> @@ -2703,6 +2703,48 @@ void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore,
>>   	}
>>   }
>>   
>> +static void driver_flr(struct intel_uncore *uncore)
>> +{
>> +	struct drm_i915_private *i915 = uncore->i915;
>> +	const unsigned int flr_timeout_ms = 3000; /* specs recommend a 3s wait */
>> +	int ret;
>> +
>> +	drm_dbg(&i915->drm, "Triggering Driver-FLR\n");
>> +
>> +	/*
>> +	 * Make sure any pending FLR requests have cleared by waiting for the
>> +	 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
>> +	 * to make sure it's not still set from a prior attempt (it's a write to
>> +	 * clear bit).
>> +	 * Note that we should never be in a situation where a previous attempt
>> +	 * is still pending (unless the HW is totally dead), but better to be
>> +	 * safe in case something unexpected happens
>> +	 */
>> +	ret = intel_wait_for_register_fw(uncore, GU_CNTL, DRIVERFLR, 0, flr_timeout_ms);
>> +	if (ret) {
>> +		drm_err(&i915->drm,
>> +			"Failed to wait for Driver-FLR bit to clear! %d\n",
>> +			ret);
>> +		return;
>> +	}
>> +	intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
>> +
>> +	/* Trigger the actual Driver-FLR */
>> +	intel_uncore_rmw_fw(uncore, GU_CNTL, 0, DRIVERFLR);
>> +
>> +	ret = intel_wait_for_register_fw(uncore, GU_DEBUG,
>> +					 DRIVERFLR_STATUS, DRIVERFLR_STATUS,
>> +					 flr_timeout_ms);
>> +	if (ret) {
>> +		drm_err(&i915->drm, "wait for Driver-FLR completion failed! %d\n", ret);
>> +		return;
>> +	}
>> +
>> +	intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
>> +
>> +	return;
>> +}
>> +
>>   /* Called via drm-managed action */
>>   void intel_uncore_fini_mmio(struct drm_device *dev, void *data)
>>   {
>> @@ -2716,6 +2758,9 @@ void intel_uncore_fini_mmio(struct drm_device *dev, void *data)
>>   		intel_uncore_fw_domains_fini(uncore);
>>   		iosf_mbi_punit_release();
>>   	}
>> +
>> +	if (intel_uncore_needs_flr_on_fini(uncore))
>> +		driver_flr(uncore);
>>   }
>>   
>>   /**
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
>> index 5449146a0624..a9fa0b11e7e4 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.h
>> +++ b/drivers/gpu/drm/i915/intel_uncore.h
>> @@ -153,6 +153,7 @@ struct intel_uncore {
>>   #define UNCORE_HAS_FPGA_DBG_UNCLAIMED	BIT(1)
>>   #define UNCORE_HAS_DBG_UNCLAIMED	BIT(2)
>>   #define UNCORE_HAS_FIFO			BIT(3)
>> +#define UNCORE_NEEDS_FLR_ON_FINI	BIT(3)
>>   
>>   	const struct intel_forcewake_range *fw_domains_table;
>>   	unsigned int fw_domains_table_entries;
>> @@ -223,6 +224,18 @@ intel_uncore_has_fifo(const struct intel_uncore *uncore)
>>   	return uncore->flags & UNCORE_HAS_FIFO;
>>   }
>>   
>> +static inline bool
>> +intel_uncore_needs_flr_on_fini(const struct intel_uncore *uncore)
>> +{
>> +	return uncore->flags & UNCORE_NEEDS_FLR_ON_FINI;
>> +}
>> +
>> +static inline bool
>> +intel_uncore_set_flr_on_fini(struct intel_uncore *uncore)
>> +{
>> +	return uncore->flags |= UNCORE_NEEDS_FLR_ON_FINI;
>> +}
>> +
>>   void intel_uncore_mmio_debug_init_early(struct drm_i915_private *i915);
>>   void intel_uncore_init_early(struct intel_uncore *uncore,
>>   			     struct intel_gt *gt);
>> -- 
>> 2.37.3
>>
Rodrigo Vivi Nov. 23, 2022, 6:32 p.m. UTC | #4
On Tue, Nov 22, 2022 at 02:50:17PM -0800, Ceraolo Spurio, Daniele wrote:
> 
> 
> On 11/22/2022 12:46 PM, Rodrigo Vivi wrote:
> > On Mon, Nov 21, 2022 at 03:16:15PM -0800, Daniele Ceraolo Spurio wrote:
> > > If the GSC was loaded, the only way to stop it during the driver unload
> > > flow is to do a driver-FLR.
> > > The driver-FLR is not the same as PCI config space FLR in that
> > > it doesn't reset the SGUnit and doesn't modify the PCI config
> > > space. Thus, it doesn't require a re-enumeration of the PCI BARs.
> > > However, the driver-FLR does cause a memory wipe of graphics memory
> > > on all discrete GPU platforms or a wipe limited to stolen memory
> > > on the integrated GPU platforms.
> > Nothing major or blocking, but a few thoughts:
> > 
> > 1. Should we document this in the code, at least in a comment in the
> > flr function?
> 
> Sure, I'll add it in
> 
> > 2. Should we call this driver_initiated_flr, aiming to reduce even more
> > the ambiguity of it?
> 
> ok
> 
> > 
> > > We perform the FLR as the last action before releasing the MMIO bar, so
> > > that we don't have to care about the consequences of the reset on the
> > > unload flow.
> > 3. should we try to implement this already in the gt_reset case as the
> > last resrouce before wedging the gt? So we can already test this flow
> > in the current platforms?
> 
> This would be nice to have, but very complicated to implement. The fact that
> FLR kills everything on the system, including resetting display and wiping
> LMEM, means that we would need a new recovery path to re-initialize all
> components. There are also potential questions on how to handle LMEM: do we
> try to migrate it to SMEM before triggering the FLR (potentially via CPU
> memcpy if the GT is dead), or do we just let it get wiped?
> 
> The reason why I wanted the FLR to be the very last thing before releasing
> MMIO access was exactly to not have to care about the recovery path ;)

it makes sense indeed.

> 
> Daniele
> 
> > 
> > > Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> > > Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com>
> > > ---
> > >   drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c |  9 +++++
> > >   drivers/gpu/drm/i915/i915_reg.h           |  3 ++
> > >   drivers/gpu/drm/i915/intel_uncore.c       | 45 +++++++++++++++++++++++
> > >   drivers/gpu/drm/i915/intel_uncore.h       | 13 +++++++
> > >   4 files changed, 70 insertions(+)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
> > > index 510fb47193ec..5dad3c19c445 100644
> > > --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
> > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
> > > @@ -173,6 +173,15 @@ int intel_gsc_fw_upload(struct intel_gsc_uc *gsc)
> > >   	if (err)
> > >   		goto fail;
> > > +	/*
> > > +	 * Once the GSC FW is loaded, the only way to kill it on driver unload
> > > +	 * is to do a driver FLR. Given this is a very disruptive action, we
> > > +	 * want to do it as the last action before releasing the access to the
> > > +	 * MMIO bar, which means we need to do it as part of the primary uncore
> > > +	 * cleanup.
> > > +	 */
> > > +	intel_uncore_set_flr_on_fini(&gt->i915->uncore);
> > > +
> > >   	/* FW is not fully operational until we enable SW proxy */
> > >   	intel_uc_fw_change_status(gsc_fw, INTEL_UC_FIRMWARE_TRANSFERRED);
> > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > > index 8e1892d14774..60e55245200b 100644
> > > --- a/drivers/gpu/drm/i915/i915_reg.h
> > > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > > @@ -118,6 +118,9 @@
> > >   #define GU_CNTL				_MMIO(0x101010)
> > >   #define   LMEM_INIT			REG_BIT(7)
> > > +#define   DRIVERFLR			REG_BIT(31)
> > > +#define GU_DEBUG			_MMIO(0x101018)
> > > +#define   DRIVERFLR_STATUS		REG_BIT(31)
> > >   #define GEN6_STOLEN_RESERVED		_MMIO(0x1082C0)
> > >   #define GEN6_STOLEN_RESERVED_ADDR_MASK	(0xFFF << 20)
> > > diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> > > index 8006a6c61466..c1befa33ff59 100644
> > > --- a/drivers/gpu/drm/i915/intel_uncore.c
> > > +++ b/drivers/gpu/drm/i915/intel_uncore.c
> > > @@ -2703,6 +2703,48 @@ void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore,
> > >   	}
> > >   }
> > > +static void driver_flr(struct intel_uncore *uncore)
> > > +{
> > > +	struct drm_i915_private *i915 = uncore->i915;
> > > +	const unsigned int flr_timeout_ms = 3000; /* specs recommend a 3s wait */
> > > +	int ret;
> > > +
> > > +	drm_dbg(&i915->drm, "Triggering Driver-FLR\n");
> > > +
> > > +	/*
> > > +	 * Make sure any pending FLR requests have cleared by waiting for the
> > > +	 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
> > > +	 * to make sure it's not still set from a prior attempt (it's a write to
> > > +	 * clear bit).
> > > +	 * Note that we should never be in a situation where a previous attempt
> > > +	 * is still pending (unless the HW is totally dead), but better to be
> > > +	 * safe in case something unexpected happens
> > > +	 */
> > > +	ret = intel_wait_for_register_fw(uncore, GU_CNTL, DRIVERFLR, 0, flr_timeout_ms);
> > > +	if (ret) {
> > > +		drm_err(&i915->drm,
> > > +			"Failed to wait for Driver-FLR bit to clear! %d\n",
> > > +			ret);
> > > +		return;
> > > +	}
> > > +	intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
> > > +
> > > +	/* Trigger the actual Driver-FLR */
> > > +	intel_uncore_rmw_fw(uncore, GU_CNTL, 0, DRIVERFLR);
> > > +
> > > +	ret = intel_wait_for_register_fw(uncore, GU_DEBUG,
> > > +					 DRIVERFLR_STATUS, DRIVERFLR_STATUS,
> > > +					 flr_timeout_ms);
> > > +	if (ret) {
> > > +		drm_err(&i915->drm, "wait for Driver-FLR completion failed! %d\n", ret);
> > > +		return;
> > > +	}
> > > +
> > > +	intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
> > > +
> > > +	return;
> > > +}
> > > +
> > >   /* Called via drm-managed action */
> > >   void intel_uncore_fini_mmio(struct drm_device *dev, void *data)
> > >   {
> > > @@ -2716,6 +2758,9 @@ void intel_uncore_fini_mmio(struct drm_device *dev, void *data)
> > >   		intel_uncore_fw_domains_fini(uncore);
> > >   		iosf_mbi_punit_release();
> > >   	}
> > > +
> > > +	if (intel_uncore_needs_flr_on_fini(uncore))
> > > +		driver_flr(uncore);
> > >   }
> > >   /**
> > > diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
> > > index 5449146a0624..a9fa0b11e7e4 100644
> > > --- a/drivers/gpu/drm/i915/intel_uncore.h
> > > +++ b/drivers/gpu/drm/i915/intel_uncore.h
> > > @@ -153,6 +153,7 @@ struct intel_uncore {
> > >   #define UNCORE_HAS_FPGA_DBG_UNCLAIMED	BIT(1)
> > >   #define UNCORE_HAS_DBG_UNCLAIMED	BIT(2)
> > >   #define UNCORE_HAS_FIFO			BIT(3)
> > > +#define UNCORE_NEEDS_FLR_ON_FINI	BIT(3)
> > >   	const struct intel_forcewake_range *fw_domains_table;
> > >   	unsigned int fw_domains_table_entries;
> > > @@ -223,6 +224,18 @@ intel_uncore_has_fifo(const struct intel_uncore *uncore)
> > >   	return uncore->flags & UNCORE_HAS_FIFO;
> > >   }
> > > +static inline bool
> > > +intel_uncore_needs_flr_on_fini(const struct intel_uncore *uncore)
> > > +{
> > > +	return uncore->flags & UNCORE_NEEDS_FLR_ON_FINI;
> > > +}
> > > +
> > > +static inline bool
> > > +intel_uncore_set_flr_on_fini(struct intel_uncore *uncore)
> > > +{
> > > +	return uncore->flags |= UNCORE_NEEDS_FLR_ON_FINI;
> > > +}
> > > +
> > >   void intel_uncore_mmio_debug_init_early(struct drm_i915_private *i915);
> > >   void intel_uncore_init_early(struct intel_uncore *uncore,
> > >   			     struct intel_gt *gt);
> > > -- 
> > > 2.37.3
> > > 
>
Alan Previn Dec. 1, 2022, 10:40 p.m. UTC | #5
Few nits - most of which are repeats from existing review comments.
I did have 1 feedback. Functionally, code logic is correct.

To speed things up, I'll provide a conditional R-b if you address the feedback below + fix the the BIT3->to-BIT4 uncore-
flags fix. Others are nits in my book: 
(conditional) Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>


On Mon, 2022-11-21 at 15:16 -0800, Ceraolo Spurio, Daniele wrote:
> If the GSC was loaded, the only way to stop it during the driver unload
> flow is to do a driver-FLR.
> The driver-FLR is not the same as PCI config space FLR in that
> it doesn't reset the SGUnit and doesn't modify the PCI config
> space. Thus, it doesn't require a re-enumeration of the PCI BARs.
> However, the driver-FLR does cause a memory wipe of graphics memory
> on all discrete GPU platforms or a wipe limited to stolen memory
> on the integrated GPU platforms.


Alan: [snip]


> > +	/*
> +	 * Once the GSC FW is loaded, the only way to kill it on driver unload
> +	 * is to do a driver FLR. Given this is a very disruptive action, we
> +	 * want to do it as the last action before releasing the access to the
> +	 * MMIO bar, which means we need to do it as part of the primary uncore
> +	 * cleanup.
> +	 */
> +	intel_uncore_set_flr_on_fini(&gt->i915->uncore);

Alan: Nit: Perhaps define what disruptive (i.e. the whole memory wiping impact) - aligns with what Rodrigo commented i
think?

Alan: Nit: Might be important for developers debugging issues to state (in comments) that the security FW has been
provided a dynamically allocated memory which is why it MUST be killed on unload (unlike prior Gen SOCs).

Alan: Feedback: I think intel_uncore_set_flr_on_fini should called before gsc_fw_load() (or after but still called if
loading failed with and error indicating the instruction was already sent such as the timeout error, before the bail).
This would be better to ensure a clean slate is set upon unload even if gsc firmware was attempted to get loaded.

Alan: [snip]


> +	/*
> +	 * Make sure any pending FLR requests have cleared by waiting for the
> +	 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
> +	 * to make sure it's not still set from a prior attempt (it's a write to
> +	 * clear bit).
> +	 * Note that we should never be in a situation where a previous attempt
> +	 * is still pending (unless the HW is totally dead), but better to be
> +	 * safe in case something unexpected happens
> +	 */
> +	ret = intel_wait_for_register_fw(uncore, GU_CNTL, DRIVERFLR, 0, flr_timeout_ms);
> +	if (ret) {
> +		drm_err(&i915->drm,
> +			"Failed to wait for Driver-FLR bit to clear! %d\n",
> +			ret);
> +		return;
> +	}
> +	intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
> +
Alan: Nit: with the current definition of MTL registers, nothing is wrong with above code but for the sake of code-
intent-readability, perhaps better to use intel_uncore_rmw_fw on above too.

Alan: [snip]

> @@ -153,6 +153,7 @@ struct intel_uncore {
>  #define UNCORE_HAS_FPGA_DBG_UNCLAIMED	BIT(1)
>  #define UNCORE_HAS_DBG_UNCLAIMED	BIT(2)
>  #define UNCORE_HAS_FIFO			BIT(3)
> +#define UNCORE_NEEDS_FLR_ON_FINI	BIT(3)
>  
Alan: Fix: yeah - this needs to be 4 - u already caught that.
Daniele Ceraolo Spurio Dec. 1, 2022, 10:52 p.m. UTC | #6
On 12/1/2022 2:40 PM, Teres Alexis, Alan Previn wrote:
> Few nits - most of which are repeats from existing review comments.
> I did have 1 feedback. Functionally, code logic is correct.
>
> To speed things up, I'll provide a conditional R-b if you address the feedback below + fix the the BIT3->to-BIT4 uncore-
> flags fix. Others are nits in my book:
> (conditional) Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
>
>
> On Mon, 2022-11-21 at 15:16 -0800, Ceraolo Spurio, Daniele wrote:
>> If the GSC was loaded, the only way to stop it during the driver unload
>> flow is to do a driver-FLR.
>> The driver-FLR is not the same as PCI config space FLR in that
>> it doesn't reset the SGUnit and doesn't modify the PCI config
>> space. Thus, it doesn't require a re-enumeration of the PCI BARs.
>> However, the driver-FLR does cause a memory wipe of graphics memory
>> on all discrete GPU platforms or a wipe limited to stolen memory
>> on the integrated GPU platforms.
>
> Alan: [snip]
>
>
>>> +	/*
>> +	 * Once the GSC FW is loaded, the only way to kill it on driver unload
>> +	 * is to do a driver FLR. Given this is a very disruptive action, we
>> +	 * want to do it as the last action before releasing the access to the
>> +	 * MMIO bar, which means we need to do it as part of the primary uncore
>> +	 * cleanup.
>> +	 */
>> +	intel_uncore_set_flr_on_fini(&gt->i915->uncore);
> Alan: Nit: Perhaps define what disruptive (i.e. the whole memory wiping impact) - aligns with what Rodrigo commented i
> think?

I'll add it in the FLR function and refer to that one

>
> Alan: Nit: Might be important for developers debugging issues to state (in comments) that the security FW has been
> provided a dynamically allocated memory which is why it MUST be killed on unload (unlike prior Gen SOCs).
>
> Alan: Feedback: I think intel_uncore_set_flr_on_fini should called before gsc_fw_load() (or after but still called if
> loading failed with and error indicating the instruction was already sent such as the timeout error, before the bail).
> This would be better to ensure a clean slate is set upon unload even if gsc firmware was attempted to get loaded.

Ok, I'll move it to before.

>
> Alan: [snip]
>
>
>> +	/*
>> +	 * Make sure any pending FLR requests have cleared by waiting for the
>> +	 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
>> +	 * to make sure it's not still set from a prior attempt (it's a write to
>> +	 * clear bit).
>> +	 * Note that we should never be in a situation where a previous attempt
>> +	 * is still pending (unless the HW is totally dead), but better to be
>> +	 * safe in case something unexpected happens
>> +	 */
>> +	ret = intel_wait_for_register_fw(uncore, GU_CNTL, DRIVERFLR, 0, flr_timeout_ms);
>> +	if (ret) {
>> +		drm_err(&i915->drm,
>> +			"Failed to wait for Driver-FLR bit to clear! %d\n",
>> +			ret);
>> +		return;
>> +	}
>> +	intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
>> +
> Alan: Nit: with the current definition of MTL registers, nothing is wrong with above code but for the sake of code-
> intent-readability, perhaps better to use intel_uncore_rmw_fw on above too.

This can't be a rmw, this register has a bunch of bits that are write to 
clear/take action, so we must write only the FLR bit.

Daniele

>
> Alan: [snip]
>
>> @@ -153,6 +153,7 @@ struct intel_uncore {
>>   #define UNCORE_HAS_FPGA_DBG_UNCLAIMED	BIT(1)
>>   #define UNCORE_HAS_DBG_UNCLAIMED	BIT(2)
>>   #define UNCORE_HAS_FIFO			BIT(3)
>> +#define UNCORE_NEEDS_FLR_ON_FINI	BIT(3)
>>   
> Alan: Fix: yeah - this needs to be 4 - u already caught that.
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
index 510fb47193ec..5dad3c19c445 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
@@ -173,6 +173,15 @@  int intel_gsc_fw_upload(struct intel_gsc_uc *gsc)
 	if (err)
 		goto fail;
 
+	/*
+	 * Once the GSC FW is loaded, the only way to kill it on driver unload
+	 * is to do a driver FLR. Given this is a very disruptive action, we
+	 * want to do it as the last action before releasing the access to the
+	 * MMIO bar, which means we need to do it as part of the primary uncore
+	 * cleanup.
+	 */
+	intel_uncore_set_flr_on_fini(&gt->i915->uncore);
+
 	/* FW is not fully operational until we enable SW proxy */
 	intel_uc_fw_change_status(gsc_fw, INTEL_UC_FIRMWARE_TRANSFERRED);
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8e1892d14774..60e55245200b 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -118,6 +118,9 @@ 
 
 #define GU_CNTL				_MMIO(0x101010)
 #define   LMEM_INIT			REG_BIT(7)
+#define   DRIVERFLR			REG_BIT(31)
+#define GU_DEBUG			_MMIO(0x101018)
+#define   DRIVERFLR_STATUS		REG_BIT(31)
 
 #define GEN6_STOLEN_RESERVED		_MMIO(0x1082C0)
 #define GEN6_STOLEN_RESERVED_ADDR_MASK	(0xFFF << 20)
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 8006a6c61466..c1befa33ff59 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -2703,6 +2703,48 @@  void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore,
 	}
 }
 
+static void driver_flr(struct intel_uncore *uncore)
+{
+	struct drm_i915_private *i915 = uncore->i915;
+	const unsigned int flr_timeout_ms = 3000; /* specs recommend a 3s wait */
+	int ret;
+
+	drm_dbg(&i915->drm, "Triggering Driver-FLR\n");
+
+	/*
+	 * Make sure any pending FLR requests have cleared by waiting for the
+	 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
+	 * to make sure it's not still set from a prior attempt (it's a write to
+	 * clear bit).
+	 * Note that we should never be in a situation where a previous attempt
+	 * is still pending (unless the HW is totally dead), but better to be
+	 * safe in case something unexpected happens
+	 */
+	ret = intel_wait_for_register_fw(uncore, GU_CNTL, DRIVERFLR, 0, flr_timeout_ms);
+	if (ret) {
+		drm_err(&i915->drm,
+			"Failed to wait for Driver-FLR bit to clear! %d\n",
+			ret);
+		return;
+	}
+	intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
+
+	/* Trigger the actual Driver-FLR */
+	intel_uncore_rmw_fw(uncore, GU_CNTL, 0, DRIVERFLR);
+
+	ret = intel_wait_for_register_fw(uncore, GU_DEBUG,
+					 DRIVERFLR_STATUS, DRIVERFLR_STATUS,
+					 flr_timeout_ms);
+	if (ret) {
+		drm_err(&i915->drm, "wait for Driver-FLR completion failed! %d\n", ret);
+		return;
+	}
+
+	intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
+
+	return;
+}
+
 /* Called via drm-managed action */
 void intel_uncore_fini_mmio(struct drm_device *dev, void *data)
 {
@@ -2716,6 +2758,9 @@  void intel_uncore_fini_mmio(struct drm_device *dev, void *data)
 		intel_uncore_fw_domains_fini(uncore);
 		iosf_mbi_punit_release();
 	}
+
+	if (intel_uncore_needs_flr_on_fini(uncore))
+		driver_flr(uncore);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
index 5449146a0624..a9fa0b11e7e4 100644
--- a/drivers/gpu/drm/i915/intel_uncore.h
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -153,6 +153,7 @@  struct intel_uncore {
 #define UNCORE_HAS_FPGA_DBG_UNCLAIMED	BIT(1)
 #define UNCORE_HAS_DBG_UNCLAIMED	BIT(2)
 #define UNCORE_HAS_FIFO			BIT(3)
+#define UNCORE_NEEDS_FLR_ON_FINI	BIT(3)
 
 	const struct intel_forcewake_range *fw_domains_table;
 	unsigned int fw_domains_table_entries;
@@ -223,6 +224,18 @@  intel_uncore_has_fifo(const struct intel_uncore *uncore)
 	return uncore->flags & UNCORE_HAS_FIFO;
 }
 
+static inline bool
+intel_uncore_needs_flr_on_fini(const struct intel_uncore *uncore)
+{
+	return uncore->flags & UNCORE_NEEDS_FLR_ON_FINI;
+}
+
+static inline bool
+intel_uncore_set_flr_on_fini(struct intel_uncore *uncore)
+{
+	return uncore->flags |= UNCORE_NEEDS_FLR_ON_FINI;
+}
+
 void intel_uncore_mmio_debug_init_early(struct drm_i915_private *i915);
 void intel_uncore_init_early(struct intel_uncore *uncore,
 			     struct intel_gt *gt);