diff mbox series

[2/2] coresight: tmc: Add shutdown callback for TMC ETR/ETF

Message ID 28123d1e19f235f97555ee36a5ed8b52d20cbdea.1590947174.git.saiprakash.ranjan@codeaurora.org (mailing list archive)
State New, archived
Headers show
Series coresight: tmc: Add shutdown callback for TMC ETR/ETF | expand

Commit Message

Sai Prakash Ranjan June 1, 2020, 8:02 a.m. UTC
Implement a shutdown callback to ensure ETR/ETF hardware is
properly shutdown in reboot/shutdown path. This is required
for ETR/ETF which has SMMU address translation enabled like
on SC7180 SoC and few others. If the hardware is still accessing
memory after SMMU translation is disabled as part of SMMU
shutdown callback in system reboot or shutdown path, then
IOVAs(I/O virtual address) which it was using will go on the bus
as the physical addresses which might result in unknown crashes
(NoC/interconnect errors). So we make sure from this shutdown
callback that the ETR/ETF is shutdown before SMMU translation is
disabled and device_link in SMMU driver will take care of ordering
of shutdown callbacks such that SMMU shutdown callback is not
called before any of its consumer shutdown callbacks.

Signed-off-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
---
 .../hwtracing/coresight/coresight-tmc-etf.c   |  4 +--
 .../hwtracing/coresight/coresight-tmc-etr.c   |  2 +-
 drivers/hwtracing/coresight/coresight-tmc.c   | 29 +++++++++++++++++++
 drivers/hwtracing/coresight/coresight-tmc.h   |  3 ++
 4 files changed, 35 insertions(+), 3 deletions(-)

Comments

Mike Leach June 1, 2020, 1:35 p.m. UTC | #1
Hi,

On Mon, 1 Jun 2020 at 09:02, Sai Prakash Ranjan
<saiprakash.ranjan@codeaurora.org> wrote:
>
> Implement a shutdown callback to ensure ETR/ETF hardware is
> properly shutdown in reboot/shutdown path. This is required
> for ETR/ETF which has SMMU address translation enabled like
> on SC7180 SoC and few others. If the hardware is still accessing
> memory after SMMU translation is disabled as part of SMMU
> shutdown callback in system reboot or shutdown path, then
> IOVAs(I/O virtual address) which it was using will go on the bus
> as the physical addresses which might result in unknown crashes
> (NoC/interconnect errors). So we make sure from this shutdown
> callback that the ETR/ETF is shutdown before SMMU translation is
> disabled and device_link in SMMU driver will take care of ordering
> of shutdown callbacks such that SMMU shutdown callback is not
> called before any of its consumer shutdown callbacks.
>
> Signed-off-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
> ---
>  .../hwtracing/coresight/coresight-tmc-etf.c   |  4 +--
>  .../hwtracing/coresight/coresight-tmc-etr.c   |  2 +-
>  drivers/hwtracing/coresight/coresight-tmc.c   | 29 +++++++++++++++++++
>  drivers/hwtracing/coresight/coresight-tmc.h   |  3 ++
>  4 files changed, 35 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
> index 36cce2bfb744..cba3e7592820 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
> +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
> @@ -85,7 +85,7 @@ static void __tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
>         CS_LOCK(drvdata->base);
>  }
>
> -static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
> +void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
>  {
>         __tmc_etb_disable_hw(drvdata);
>         coresight_disclaim_device(drvdata->base);
> @@ -118,7 +118,7 @@ static int tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
>         return 0;
>  }
>
> -static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
> +void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
>  {
>         CS_UNLOCK(drvdata->base);
>
> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> index 625882bc8b08..b29c2db94d96 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
> +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> @@ -1110,7 +1110,7 @@ static void __tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
>
>  }
>
> -static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
> +void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
>  {
>         __tmc_etr_disable_hw(drvdata);
>         /* Disable CATU device if this ETR is connected to one */
> diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c
> index 5a271ebc4585..7e687a356fe0 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc.c
> +++ b/drivers/hwtracing/coresight/coresight-tmc.c
> @@ -540,6 +540,34 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id)
>         return ret;
>  }
>
> +static void tmc_shutdown(struct amba_device *adev)
> +{
> +       struct tmc_drvdata *drvdata = amba_get_drvdata(adev);
> +

Take drvdata->spinlock here? The tmc_xxx_disable_hw functions are
normally called with the spinlock claimed.

> +       if (!drvdata->enable)

As per previous patch drvdata->mode can be used here.

Regards

Mike

> +               goto out;
> +
> +       /*
> +        * We do not care about the active trace sessions here
> +        * since the system is going down unlike remove callback,
> +        * just make sure that the hardware is shutdown.
> +        */
> +       switch (drvdata->config_type) {
> +       case TMC_CONFIG_TYPE_ETB:
> +               tmc_etb_disable_hw(drvdata);
> +               break;
> +       case TMC_CONFIG_TYPE_ETF:
> +               tmc_etf_disable_hw(drvdata);
> +               break;
> +       case TMC_CONFIG_TYPE_ETR:
> +               tmc_etr_disable_hw(drvdata);
> +       }
> +
> +out:
> +       misc_deregister(&drvdata->miscdev);
> +       coresight_unregister(drvdata->csdev);
> +}
> +
>  static const struct amba_id tmc_ids[] = {
>         CS_AMBA_ID(0x000bb961),
>         /* Coresight SoC 600 TMC-ETR/ETS */
> @@ -558,6 +586,7 @@ static struct amba_driver tmc_driver = {
>                 .suppress_bind_attrs = true,
>         },
>         .probe          = tmc_probe,
> +       .shutdown       = tmc_shutdown,
>         .id_table       = tmc_ids,
>  };
>  builtin_amba_driver(tmc_driver);
> diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
> index d156860495c7..f4f56c474e58 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc.h
> +++ b/drivers/hwtracing/coresight/coresight-tmc.h
> @@ -262,6 +262,8 @@ u32 tmc_get_memwidth_mask(struct tmc_drvdata *drvdata);
>  /* ETB/ETF functions */
>  int tmc_read_prepare_etb(struct tmc_drvdata *drvdata);
>  int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata);
> +void tmc_etb_disable_hw(struct tmc_drvdata *drvdata);
> +void tmc_etf_disable_hw(struct tmc_drvdata *drvdata);
>  extern const struct coresight_ops tmc_etb_cs_ops;
>  extern const struct coresight_ops tmc_etf_cs_ops;
>
> @@ -270,6 +272,7 @@ ssize_t tmc_etb_get_sysfs_trace(struct tmc_drvdata *drvdata,
>  /* ETR functions */
>  int tmc_read_prepare_etr(struct tmc_drvdata *drvdata);
>  int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata);
> +void tmc_etr_disable_hw(struct tmc_drvdata *drvdata);
>  extern const struct coresight_ops tmc_etr_cs_ops;
>  ssize_t tmc_etr_get_sysfs_trace(struct tmc_drvdata *drvdata,
>                                 loff_t pos, size_t len, char **bufpp);
> --
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
> of Code Aurora Forum, hosted by The Linux Foundation
>
Sai Prakash Ranjan June 1, 2020, 5:15 p.m. UTC | #2
Hi Mike,

Thanks for the review.

On 2020-06-01 19:05, Mike Leach wrote:
> Hi,
> 
> On Mon, 1 Jun 2020 at 09:02, Sai Prakash Ranjan
> <saiprakash.ranjan@codeaurora.org> wrote:
>> 
>> Implement a shutdown callback to ensure ETR/ETF hardware is
>> properly shutdown in reboot/shutdown path. This is required
>> for ETR/ETF which has SMMU address translation enabled like
>> on SC7180 SoC and few others. If the hardware is still accessing
>> memory after SMMU translation is disabled as part of SMMU
>> shutdown callback in system reboot or shutdown path, then
>> IOVAs(I/O virtual address) which it was using will go on the bus
>> as the physical addresses which might result in unknown crashes
>> (NoC/interconnect errors). So we make sure from this shutdown
>> callback that the ETR/ETF is shutdown before SMMU translation is
>> disabled and device_link in SMMU driver will take care of ordering
>> of shutdown callbacks such that SMMU shutdown callback is not
>> called before any of its consumer shutdown callbacks.
>> 
>> Signed-off-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
>> ---
>>  .../hwtracing/coresight/coresight-tmc-etf.c   |  4 +--
>>  .../hwtracing/coresight/coresight-tmc-etr.c   |  2 +-
>>  drivers/hwtracing/coresight/coresight-tmc.c   | 29 
>> +++++++++++++++++++
>>  drivers/hwtracing/coresight/coresight-tmc.h   |  3 ++
>>  4 files changed, 35 insertions(+), 3 deletions(-)
>> 
>> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c 
>> b/drivers/hwtracing/coresight/coresight-tmc-etf.c
>> index 36cce2bfb744..cba3e7592820 100644
>> --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
>> +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
>> @@ -85,7 +85,7 @@ static void __tmc_etb_disable_hw(struct tmc_drvdata 
>> *drvdata)
>>         CS_LOCK(drvdata->base);
>>  }
>> 
>> -static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
>> +void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
>>  {
>>         __tmc_etb_disable_hw(drvdata);
>>         coresight_disclaim_device(drvdata->base);
>> @@ -118,7 +118,7 @@ static int tmc_etf_enable_hw(struct tmc_drvdata 
>> *drvdata)
>>         return 0;
>>  }
>> 
>> -static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
>> +void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
>>  {
>>         CS_UNLOCK(drvdata->base);
>> 
>> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c 
>> b/drivers/hwtracing/coresight/coresight-tmc-etr.c
>> index 625882bc8b08..b29c2db94d96 100644
>> --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
>> +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
>> @@ -1110,7 +1110,7 @@ static void __tmc_etr_disable_hw(struct 
>> tmc_drvdata *drvdata)
>> 
>>  }
>> 
>> -static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
>> +void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
>>  {
>>         __tmc_etr_disable_hw(drvdata);
>>         /* Disable CATU device if this ETR is connected to one */
>> diff --git a/drivers/hwtracing/coresight/coresight-tmc.c 
>> b/drivers/hwtracing/coresight/coresight-tmc.c
>> index 5a271ebc4585..7e687a356fe0 100644
>> --- a/drivers/hwtracing/coresight/coresight-tmc.c
>> +++ b/drivers/hwtracing/coresight/coresight-tmc.c
>> @@ -540,6 +540,34 @@ static int tmc_probe(struct amba_device *adev, 
>> const struct amba_id *id)
>>         return ret;
>>  }
>> 
>> +static void tmc_shutdown(struct amba_device *adev)
>> +{
>> +       struct tmc_drvdata *drvdata = amba_get_drvdata(adev);
>> +
> 
> Take drvdata->spinlock here? The tmc_xxx_disable_hw functions are
> normally called with the spinlock claimed.
> 

Sure will take spinlock here.

>> +       if (!drvdata->enable)
> 
> As per previous patch drvdata->mode can be used here.
> 

Yes, will use mode here and drop enable flag in the next version.

Thanks,
Sai
Mathieu Poirier June 1, 2020, 9:28 p.m. UTC | #3
Hi Sai,

On top of the comments already privided by Mike, I have the following:

On Mon, Jun 01, 2020 at 01:32:26PM +0530, Sai Prakash Ranjan wrote:
> Implement a shutdown callback to ensure ETR/ETF hardware is
> properly shutdown in reboot/shutdown path. This is required
> for ETR/ETF which has SMMU address translation enabled like
> on SC7180 SoC and few others. If the hardware is still accessing
> memory after SMMU translation is disabled as part of SMMU
> shutdown callback in system reboot or shutdown path, then
> IOVAs(I/O virtual address) which it was using will go on the bus
> as the physical addresses which might result in unknown crashes
> (NoC/interconnect errors). So we make sure from this shutdown
> callback that the ETR/ETF is shutdown before SMMU translation is
> disabled and device_link in SMMU driver will take care of ordering
> of shutdown callbacks such that SMMU shutdown callback is not
> called before any of its consumer shutdown callbacks.
> 
> Signed-off-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
> ---
>  .../hwtracing/coresight/coresight-tmc-etf.c   |  4 +--
>  .../hwtracing/coresight/coresight-tmc-etr.c   |  2 +-
>  drivers/hwtracing/coresight/coresight-tmc.c   | 29 +++++++++++++++++++
>  drivers/hwtracing/coresight/coresight-tmc.h   |  3 ++
>  4 files changed, 35 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
> index 36cce2bfb744..cba3e7592820 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
> +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
> @@ -85,7 +85,7 @@ static void __tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
>  	CS_LOCK(drvdata->base);
>  }
>  
> -static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
> +void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
>  {
>  	__tmc_etb_disable_hw(drvdata);
>  	coresight_disclaim_device(drvdata->base);
> @@ -118,7 +118,7 @@ static int tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
>  	return 0;
>  }
>  
> -static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
> +void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
>  {
>  	CS_UNLOCK(drvdata->base);
> 

Why do we care about ETB and ETF when they both use RAM internal to the device?
Moreover, the system RAM they use is not dedicated and as such falls back to the
kernel's memory pool. 
 
> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> index 625882bc8b08..b29c2db94d96 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
> +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> @@ -1110,7 +1110,7 @@ static void __tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
>  
>  }
>  
> -static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
> +void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
>  {
>  	__tmc_etr_disable_hw(drvdata);
>  	/* Disable CATU device if this ETR is connected to one */
> diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c
> index 5a271ebc4585..7e687a356fe0 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc.c
> +++ b/drivers/hwtracing/coresight/coresight-tmc.c
> @@ -540,6 +540,34 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id)
>  	return ret;
>  }
>  
> +static void tmc_shutdown(struct amba_device *adev)
> +{
> +	struct tmc_drvdata *drvdata = amba_get_drvdata(adev);
> +
> +	if (!drvdata->enable)
> +		goto out;
> +
> +	/*
> +	 * We do not care about the active trace sessions here
> +	 * since the system is going down unlike remove callback,
> +	 * just make sure that the hardware is shutdown.
> +	 */
> +	switch (drvdata->config_type) {
> +	case TMC_CONFIG_TYPE_ETB:
> +		tmc_etb_disable_hw(drvdata);
> +		break;
> +	case TMC_CONFIG_TYPE_ETF:
> +		tmc_etf_disable_hw(drvdata);
> +		break;
> +	case TMC_CONFIG_TYPE_ETR:
> +		tmc_etr_disable_hw(drvdata);
> +	}
> +
> +out:
> +	misc_deregister(&drvdata->miscdev);
> +	coresight_unregister(drvdata->csdev);

If a session is active when tmc_shutdown() is called, unregistering the ETF/ETR
will result in a kernel crash if the session is stopped before the kernel has
had the opportunity to shutdown.  It is the problem as trying to make coresight
drivers modular. 

For this to really work the ongoing session would need to be stopped.  That
would teardown the path and stop the sink.

That being said I'm sure that dependencies on an IOMMU isn't a problem confined
to coresight. I am adding Robin Murphy, who added this commit [1], to the thread
in the hope that he can provide guidance on the right way to do this.

Thanks,
Mathieu

[1]. 2ac15068f346 arm64: dts: juno: Add SMMUs device nodes  

> +}
> +
>  static const struct amba_id tmc_ids[] = {
>  	CS_AMBA_ID(0x000bb961),
>  	/* Coresight SoC 600 TMC-ETR/ETS */
> @@ -558,6 +586,7 @@ static struct amba_driver tmc_driver = {
>  		.suppress_bind_attrs = true,
>  	},
>  	.probe		= tmc_probe,
> +	.shutdown	= tmc_shutdown,
>  	.id_table	= tmc_ids,
>  };
>  builtin_amba_driver(tmc_driver);
> diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
> index d156860495c7..f4f56c474e58 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc.h
> +++ b/drivers/hwtracing/coresight/coresight-tmc.h
> @@ -262,6 +262,8 @@ u32 tmc_get_memwidth_mask(struct tmc_drvdata *drvdata);
>  /* ETB/ETF functions */
>  int tmc_read_prepare_etb(struct tmc_drvdata *drvdata);
>  int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata);
> +void tmc_etb_disable_hw(struct tmc_drvdata *drvdata);
> +void tmc_etf_disable_hw(struct tmc_drvdata *drvdata);
>  extern const struct coresight_ops tmc_etb_cs_ops;
>  extern const struct coresight_ops tmc_etf_cs_ops;
>  
> @@ -270,6 +272,7 @@ ssize_t tmc_etb_get_sysfs_trace(struct tmc_drvdata *drvdata,
>  /* ETR functions */
>  int tmc_read_prepare_etr(struct tmc_drvdata *drvdata);
>  int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata);
> +void tmc_etr_disable_hw(struct tmc_drvdata *drvdata);
>  extern const struct coresight_ops tmc_etr_cs_ops;
>  ssize_t tmc_etr_get_sysfs_trace(struct tmc_drvdata *drvdata,
>  				loff_t pos, size_t len, char **bufpp);
> -- 
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
> of Code Aurora Forum, hosted by The Linux Foundation
>
Sai Prakash Ranjan June 2, 2020, 7:30 a.m. UTC | #4
Hi Mathieu,

Thanks for taking your time for review.

On 2020-06-02 02:58, Mathieu Poirier wrote:
> Hi Sai,
> 
> On top of the comments already privided by Mike, I have the following:
> 
> On Mon, Jun 01, 2020 at 01:32:26PM +0530, Sai Prakash Ranjan wrote:
>> Implement a shutdown callback to ensure ETR/ETF hardware is
>> properly shutdown in reboot/shutdown path. This is required
>> for ETR/ETF which has SMMU address translation enabled like
>> on SC7180 SoC and few others. If the hardware is still accessing
>> memory after SMMU translation is disabled as part of SMMU
>> shutdown callback in system reboot or shutdown path, then
>> IOVAs(I/O virtual address) which it was using will go on the bus
>> as the physical addresses which might result in unknown crashes
>> (NoC/interconnect errors). So we make sure from this shutdown
>> callback that the ETR/ETF is shutdown before SMMU translation is
>> disabled and device_link in SMMU driver will take care of ordering
>> of shutdown callbacks such that SMMU shutdown callback is not
>> called before any of its consumer shutdown callbacks.
>> 
>> Signed-off-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
>> ---
>>  .../hwtracing/coresight/coresight-tmc-etf.c   |  4 +--
>>  .../hwtracing/coresight/coresight-tmc-etr.c   |  2 +-
>>  drivers/hwtracing/coresight/coresight-tmc.c   | 29 
>> +++++++++++++++++++
>>  drivers/hwtracing/coresight/coresight-tmc.h   |  3 ++
>>  4 files changed, 35 insertions(+), 3 deletions(-)
>> 
>> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c 
>> b/drivers/hwtracing/coresight/coresight-tmc-etf.c
>> index 36cce2bfb744..cba3e7592820 100644
>> --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
>> +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
>> @@ -85,7 +85,7 @@ static void __tmc_etb_disable_hw(struct tmc_drvdata 
>> *drvdata)
>>  	CS_LOCK(drvdata->base);
>>  }
>> 
>> -static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
>> +void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
>>  {
>>  	__tmc_etb_disable_hw(drvdata);
>>  	coresight_disclaim_device(drvdata->base);
>> @@ -118,7 +118,7 @@ static int tmc_etf_enable_hw(struct tmc_drvdata 
>> *drvdata)
>>  	return 0;
>>  }
>> 
>> -static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
>> +void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
>>  {
>>  	CS_UNLOCK(drvdata->base);
>> 
> 
> Why do we care about ETB and ETF when they both use RAM internal to the 
> device?
> Moreover, the system RAM they use is not dedicated and as such falls 
> back to the
> kernel's memory pool.
> 

Actually we don't, I added the disable for ETF/ETB for completeness 
since we are
adding shutdown callback for TMC devices and not just ETR although this 
issue applies
only for ETR and it doesn't hurt to disable these devices in shutdown 
path.

>> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c 
>> b/drivers/hwtracing/coresight/coresight-tmc-etr.c
>> index 625882bc8b08..b29c2db94d96 100644
>> --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
>> +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
>> @@ -1110,7 +1110,7 @@ static void __tmc_etr_disable_hw(struct 
>> tmc_drvdata *drvdata)
>> 
>>  }
>> 
>> -static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
>> +void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
>>  {
>>  	__tmc_etr_disable_hw(drvdata);
>>  	/* Disable CATU device if this ETR is connected to one */
>> diff --git a/drivers/hwtracing/coresight/coresight-tmc.c 
>> b/drivers/hwtracing/coresight/coresight-tmc.c
>> index 5a271ebc4585..7e687a356fe0 100644
>> --- a/drivers/hwtracing/coresight/coresight-tmc.c
>> +++ b/drivers/hwtracing/coresight/coresight-tmc.c
>> @@ -540,6 +540,34 @@ static int tmc_probe(struct amba_device *adev, 
>> const struct amba_id *id)
>>  	return ret;
>>  }
>> 
>> +static void tmc_shutdown(struct amba_device *adev)
>> +{
>> +	struct tmc_drvdata *drvdata = amba_get_drvdata(adev);
>> +
>> +	if (!drvdata->enable)
>> +		goto out;
>> +
>> +	/*
>> +	 * We do not care about the active trace sessions here
>> +	 * since the system is going down unlike remove callback,
>> +	 * just make sure that the hardware is shutdown.
>> +	 */
>> +	switch (drvdata->config_type) {
>> +	case TMC_CONFIG_TYPE_ETB:
>> +		tmc_etb_disable_hw(drvdata);
>> +		break;
>> +	case TMC_CONFIG_TYPE_ETF:
>> +		tmc_etf_disable_hw(drvdata);
>> +		break;
>> +	case TMC_CONFIG_TYPE_ETR:
>> +		tmc_etr_disable_hw(drvdata);
>> +	}
>> +
>> +out:
>> +	misc_deregister(&drvdata->miscdev);
>> +	coresight_unregister(drvdata->csdev);
> 
> If a session is active when tmc_shutdown() is called, unregistering the 
> ETF/ETR
> will result in a kernel crash if the session is stopped before the 
> kernel has
> had the opportunity to shutdown.  It is the problem as trying to make 
> coresight
> drivers modular.
> 
> For this to really work the ongoing session would need to be stopped.  
> That
> would teardown the path and stop the sink.

I have tested this with and without active trace sessions multiple times 
on 2 devices
and did not observe a single crash. The crash should be easily triggered 
as per
what you are saying if we have active sessions but I do not see any 
crash.

> 
> That being said I'm sure that dependencies on an IOMMU isn't a problem 
> confined
> to coresight. I am adding Robin Murphy, who added this commit [1], to 
> the thread
> in the hope that he can provide guidance on the right way to do this.
> 

SMMU/IOMMU won't be able to do much here as it is the client's 
responsiblity to
properly shutdown and SMMU device link just makes sure that 
SMMU(supplier) shutdown is
called only after its consumers shutdown callbacks are called.

Thanks,
Sai
Mike Leach June 2, 2020, 10:12 p.m. UTC | #5
Hi Sai,

On Tue, 2 Jun 2020 at 08:30, Sai Prakash Ranjan
<saiprakash.ranjan@codeaurora.org> wrote:
>
> Hi Mathieu,
>
> Thanks for taking your time for review.
>
> On 2020-06-02 02:58, Mathieu Poirier wrote:
> > Hi Sai,
> >
> > On top of the comments already privided by Mike, I have the following:
> >
> > On Mon, Jun 01, 2020 at 01:32:26PM +0530, Sai Prakash Ranjan wrote:
> >> Implement a shutdown callback to ensure ETR/ETF hardware is
> >> properly shutdown in reboot/shutdown path. This is required
> >> for ETR/ETF which has SMMU address translation enabled like
> >> on SC7180 SoC and few others. If the hardware is still accessing
> >> memory after SMMU translation is disabled as part of SMMU
> >> shutdown callback in system reboot or shutdown path, then
> >> IOVAs(I/O virtual address) which it was using will go on the bus
> >> as the physical addresses which might result in unknown crashes
> >> (NoC/interconnect errors). So we make sure from this shutdown
> >> callback that the ETR/ETF is shutdown before SMMU translation is
> >> disabled and device_link in SMMU driver will take care of ordering
> >> of shutdown callbacks such that SMMU shutdown callback is not
> >> called before any of its consumer shutdown callbacks.
> >>
> >> Signed-off-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
> >> ---
> >>  .../hwtracing/coresight/coresight-tmc-etf.c   |  4 +--
> >>  .../hwtracing/coresight/coresight-tmc-etr.c   |  2 +-
> >>  drivers/hwtracing/coresight/coresight-tmc.c   | 29
> >> +++++++++++++++++++
> >>  drivers/hwtracing/coresight/coresight-tmc.h   |  3 ++
> >>  4 files changed, 35 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c
> >> b/drivers/hwtracing/coresight/coresight-tmc-etf.c
> >> index 36cce2bfb744..cba3e7592820 100644
> >> --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
> >> +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
> >> @@ -85,7 +85,7 @@ static void __tmc_etb_disable_hw(struct tmc_drvdata
> >> *drvdata)
> >>      CS_LOCK(drvdata->base);
> >>  }
> >>
> >> -static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
> >> +void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
> >>  {
> >>      __tmc_etb_disable_hw(drvdata);
> >>      coresight_disclaim_device(drvdata->base);
> >> @@ -118,7 +118,7 @@ static int tmc_etf_enable_hw(struct tmc_drvdata
> >> *drvdata)
> >>      return 0;
> >>  }
> >>
> >> -static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
> >> +void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
> >>  {
> >>      CS_UNLOCK(drvdata->base);
> >>
> >
> > Why do we care about ETB and ETF when they both use RAM internal to the
> > device?
> > Moreover, the system RAM they use is not dedicated and as such falls
> > back to the
> > kernel's memory pool.
> >
>
> Actually we don't, I added the disable for ETF/ETB for completeness
> since we are
> adding shutdown callback for TMC devices and not just ETR although this
> issue applies
> only for ETR and it doesn't hurt to disable these devices in shutdown
> path.
>

If they don't affect the issue you are fixing, there are good reasons
for leaving ETB./ETF running.
If a system is not completely powered down, then the static ram in
these devices can sometimes be used for post-mortem diagnosis after
re-start.

> >> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c
> >> b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> >> index 625882bc8b08..b29c2db94d96 100644
> >> --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
> >> +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> >> @@ -1110,7 +1110,7 @@ static void __tmc_etr_disable_hw(struct
> >> tmc_drvdata *drvdata)
> >>
> >>  }
> >>
> >> -static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
> >> +void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
> >>  {
> >>      __tmc_etr_disable_hw(drvdata);
> >>      /* Disable CATU device if this ETR is connected to one */
> >> diff --git a/drivers/hwtracing/coresight/coresight-tmc.c
> >> b/drivers/hwtracing/coresight/coresight-tmc.c
> >> index 5a271ebc4585..7e687a356fe0 100644
> >> --- a/drivers/hwtracing/coresight/coresight-tmc.c
> >> +++ b/drivers/hwtracing/coresight/coresight-tmc.c
> >> @@ -540,6 +540,34 @@ static int tmc_probe(struct amba_device *adev,
> >> const struct amba_id *id)
> >>      return ret;
> >>  }
> >>
> >> +static void tmc_shutdown(struct amba_device *adev)
> >> +{
> >> +    struct tmc_drvdata *drvdata = amba_get_drvdata(adev);
> >> +
> >> +    if (!drvdata->enable)
> >> +            goto out;
> >> +
> >> +    /*
> >> +     * We do not care about the active trace sessions here
> >> +     * since the system is going down unlike remove callback,
> >> +     * just make sure that the hardware is shutdown.
> >> +     */
> >> +    switch (drvdata->config_type) {
> >> +    case TMC_CONFIG_TYPE_ETB:
> >> +            tmc_etb_disable_hw(drvdata);
> >> +            break;
> >> +    case TMC_CONFIG_TYPE_ETF:
> >> +            tmc_etf_disable_hw(drvdata);
> >> +            break;
> >> +    case TMC_CONFIG_TYPE_ETR:
> >> +            tmc_etr_disable_hw(drvdata);
> >> +    }
> >> +
> >> +out:
> >> +    misc_deregister(&drvdata->miscdev);
> >> +    coresight_unregister(drvdata->csdev);
> >
> > If a session is active when tmc_shutdown() is called, unregistering the
> > ETF/ETR
> > will result in a kernel crash if the session is stopped before the
> > kernel has
> > had the opportunity to shutdown.  It is the problem as trying to make
> > coresight
> > drivers modular.
> >
> > For this to really work the ongoing session would need to be stopped.
> > That
> > would teardown the path and stop the sink.
>
> I have tested this with and without active trace sessions multiple times
> on 2 devices
> and did not observe a single crash. The crash should be easily triggered
> as per
> what you are saying if we have active sessions but I do not see any
> crash.
>
> >
> > That being said I'm sure that dependencies on an IOMMU isn't a problem
> > confined
> > to coresight. I am adding Robin Murphy, who added this commit [1], to
> > the thread
> > in the hope that he can provide guidance on the right way to do this.
> >
>
> SMMU/IOMMU won't be able to do much here as it is the client's
> responsiblity to
> properly shutdown and SMMU device link just makes sure that
> SMMU(supplier) shutdown is
> called only after its consumers shutdown callbacks are called.

I think this use case can be handled slightly differently than the
general requirements for modular CoreSight drivers.

What is needed here is a way of stopping the underlying ETR hardware
from issuing data to the SMMU, until the entire device has been shut
down, in a way that does not remove the driver, breaking existing
references and causing a system crash.

We could introduce a new mode to the ETR driver - e.g. CS_MODE_SHUTDOWN.

At the end of the block tmc_shutdown(struct amba_device *adev), set
drvdata->mode to CS_MODE_SHUTDOWN & remove the coresight_unregister().
This new mode can be used to  prevent the underlying hardware from
being able to restart until the device is re-powered.

This mode can be detected in the code that enables / disables the ETR
and handled appropriately (updates to tmc_enable_etr_sink and
tmc_disable_etr_sink).
This mode will persist until the device is re-started - but because we
are on the device shutdown path this is not an issue.

This should leave the CoreSight infrastructure stable until the
drivers are shut down normally as part of the device power down
process.

Regards

Mike



>
> Thanks,
> Sai
>
> --
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a
> member
> of Code Aurora Forum, hosted by The Linux Foundation



--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
Sai Prakash Ranjan June 3, 2020, 10:24 a.m. UTC | #6
Hi Mike,

Thanks again for looking at this.

On 2020-06-03 03:42, Mike Leach wrote:
[...]

>> 
>> SMMU/IOMMU won't be able to do much here as it is the client's
>> responsiblity to
>> properly shutdown and SMMU device link just makes sure that
>> SMMU(supplier) shutdown is
>> called only after its consumers shutdown callbacks are called.
> 
> I think this use case can be handled slightly differently than the
> general requirements for modular CoreSight drivers.
> 
> What is needed here is a way of stopping the underlying ETR hardware
> from issuing data to the SMMU, until the entire device has been shut
> down, in a way that does not remove the driver, breaking existing
> references and causing a system crash.
> 
> We could introduce a new mode to the ETR driver - e.g. 
> CS_MODE_SHUTDOWN.
> 
> At the end of the block tmc_shutdown(struct amba_device *adev), set
> drvdata->mode to CS_MODE_SHUTDOWN & remove the coresight_unregister().
> This new mode can be used to  prevent the underlying hardware from
> being able to restart until the device is re-powered.
> 
> This mode can be detected in the code that enables / disables the ETR
> and handled appropriately (updates to tmc_enable_etr_sink and
> tmc_disable_etr_sink).
> This mode will persist until the device is re-started - but because we
> are on the device shutdown path this is not an issue.
> 
> This should leave the CoreSight infrastructure stable until the
> drivers are shut down normally as part of the device power down
> process.
> 

Sounds good to me, but if the coresight_unregister() is the trouble 
point
causing these crashes, then can't we just remove that from 
tmc_shutdown()
callback? This would be like maintaining the same behaviour as now where
on reboot/shutdown we basically don't do anything except for disabling 
ETR.
This way, we do not have to introduce any new mode as well. To be exact, 
in
tmc_shutdown() we just disable ETR and then return without unregistering
which should not cause any issues since this is shutdown not the remove
callback which is a requirement for making coresight modular like below:

static void tmc_shutdown(struct amba_device *adev)
  {
          unsigned long flags;
          struct tmc_drvdata *drvdata = amba_get_drvdata(adev);

          spin_lock_irqsave(&drvdata->spinlock, flags);

          if (drvdata->mode == CS_MODE_DISABLED)
                  goto out;

          if (drvdata->config_type == TMC_CONFIG_TYPE_ETR)
                  tmc_etr_disable_hw(drvdata);

          /*
           * We do not care about coresight unregister here unlike remove
           * callback which is required for making coresight modular 
since
           * the system is going down after this.
           */
  out:
          spin_unlock_irqrestore(&drvdata->spinlock, flags);
  }


Thanks,
Sai
Mike Leach June 3, 2020, 11:27 a.m. UTC | #7
Hi,

On Wed, 3 Jun 2020 at 11:24, Sai Prakash Ranjan
<saiprakash.ranjan@codeaurora.org> wrote:
>
> Hi Mike,
>
> Thanks again for looking at this.
>
> On 2020-06-03 03:42, Mike Leach wrote:
> [...]
>
> >>
> >> SMMU/IOMMU won't be able to do much here as it is the client's
> >> responsiblity to
> >> properly shutdown and SMMU device link just makes sure that
> >> SMMU(supplier) shutdown is
> >> called only after its consumers shutdown callbacks are called.
> >
> > I think this use case can be handled slightly differently than the
> > general requirements for modular CoreSight drivers.
> >
> > What is needed here is a way of stopping the underlying ETR hardware
> > from issuing data to the SMMU, until the entire device has been shut
> > down, in a way that does not remove the driver, breaking existing
> > references and causing a system crash.
> >
> > We could introduce a new mode to the ETR driver - e.g.
> > CS_MODE_SHUTDOWN.
> >
> > At the end of the block tmc_shutdown(struct amba_device *adev), set
> > drvdata->mode to CS_MODE_SHUTDOWN & remove the coresight_unregister().
> > This new mode can be used to  prevent the underlying hardware from
> > being able to restart until the device is re-powered.
> >
> > This mode can be detected in the code that enables / disables the ETR
> > and handled appropriately (updates to tmc_enable_etr_sink and
> > tmc_disable_etr_sink).
> > This mode will persist until the device is re-started - but because we
> > are on the device shutdown path this is not an issue.
> >
> > This should leave the CoreSight infrastructure stable until the
> > drivers are shut down normally as part of the device power down
> > process.
> >
>
> Sounds good to me, but if the coresight_unregister() is the trouble
> point
> causing these crashes, then can't we just remove that from
> tmc_shutdown()
> callback? This would be like maintaining the same behaviour as now where
> on reboot/shutdown we basically don't do anything except for disabling
> ETR.

No - the new mode prevents race conditions where the thread shutting
down the SMMU does the ETR shutdown, but then another thread happens
to be trying to start trace and restarts the ETR.
It also prevents the condition Mathieu discussed where a thread might
be attempting to shutdown trace - this could try to disable the
hardware again re-releasing resources/ re-flushing and waiting for
stop.

Regards

Mike

> This way, we do not have to introduce any new mode as well. To be exact,
> in
> tmc_shutdown() we just disable ETR and then return without unregistering
> which should not cause any issues since this is shutdown not the remove
> callback which is a requirement for making coresight modular like below:
>
> static void tmc_shutdown(struct amba_device *adev)
>   {
>           unsigned long flags;
>           struct tmc_drvdata *drvdata = amba_get_drvdata(adev);
>
>           spin_lock_irqsave(&drvdata->spinlock, flags);
>
>           if (drvdata->mode == CS_MODE_DISABLED)
>                   goto out;
>
>           if (drvdata->config_type == TMC_CONFIG_TYPE_ETR)
>                   tmc_etr_disable_hw(drvdata);
>
>           /*
>            * We do not care about coresight unregister here unlike remove
>            * callback which is required for making coresight modular
> since
>            * the system is going down after this.
>            */
>   out:
>           spin_unlock_irqrestore(&drvdata->spinlock, flags);
>   } from disabling the ETR again - potentially freeing up memory twice.
>
>
> Thanks,
> Sai
>
> --
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a
> member
> of Code Aurora Forum, hosted by The Linux Foundation
Robin Murphy June 3, 2020, 11:37 a.m. UTC | #8
On 2020-06-01 22:28, Mathieu Poirier wrote:
> That being said I'm sure that dependencies on an IOMMU isn't a problem confined
> to coresight. I am adding Robin Murphy, who added this commit [1], to the thread
> in the hope that he can provide guidance on the right way to do this.

Right, it's not specific to CoreSight, and it's not even specific to 
IOMMUs really. In short, blame kexec ;)

The fundamental thing is that devices should stop any DMA activity at 
shutdown. For a normal poweroff you can typically get away without doing 
so, but over kexec, ongoing DMA traffic may corrupt memory in the new 
kernel (at worst, I think even DMA reads could potentially cause 
unexpected cache behaviour that might lead to mishaps, given the right 
combination of memory attributes).

IOMMUs merely help to make the situation more serious. For similar kexec 
reasons, they need to disable any existing translations at shutdown 
(imagine if the second kernel didn't have an IOMMU driver). And at that 
point, even the normal poweroff case becomes problematic, because any 
device DMA that hasn't been shut down beforehand is now not necessarily 
going benignly to memory as it would in the no-IOMMU case above, but 
potentially to random physical addresses, with all the hilarity ensuing 
that you would expect from that.

Robin.
Sai Prakash Ranjan June 3, 2020, noon UTC | #9
Hi Robin, Mathieu

On 2020-06-03 17:07, Robin Murphy wrote:
> On 2020-06-01 22:28, Mathieu Poirier wrote:
>> That being said I'm sure that dependencies on an IOMMU isn't a problem 
>> confined
>> to coresight. I am adding Robin Murphy, who added this commit [1], to 
>> the thread
>> in the hope that he can provide guidance on the right way to do this.
> 
> Right, it's not specific to CoreSight, and it's not even specific to
> IOMMUs really. In short, blame kexec ;)
> 

Yes it is not specific to coresight, we are targeting this for all
consumers/clients of SMMU(atleast on SC7180 SoC). We have display 
throwing
NoC/interconnect errors[1] during reboot after SMMU is disabled.
This is also not specific to kexec either as you explained here [2] 
about
a case with display which is exacly what is happening in our system [1].

[1] 
https://lore.kernel.org/lkml/1591009402-681-1-git-send-email-mkrishn@codeaurora.org/
[2] 
https://lore.kernel.org/lkml/5858bdac-b7f9-ac26-0c0d-c9653cef841d@arm.com/

> The fundamental thing is that devices should stop any DMA activity at
> shutdown. For a normal poweroff you can typically get away without
> doing so, but over kexec, ongoing DMA traffic may corrupt memory in
> the new kernel (at worst, I think even DMA reads could potentially
> cause unexpected cache behaviour that might lead to mishaps, given the
> right combination of memory attributes).
> 
> IOMMUs merely help to make the situation more serious. For similar
> kexec reasons, they need to disable any existing translations at
> shutdown (imagine if the second kernel didn't have an IOMMU driver).
> And at that point, even the normal poweroff case becomes problematic,
> because any device DMA that hasn't been shut down beforehand is now
> not necessarily going benignly to memory as it would in the no-IOMMU
> case above, but potentially to random physical addresses, with all the
> hilarity ensuing that you would expect from that.
> 

Thanks,
Sai
Sai Prakash Ranjan June 3, 2020, 12:14 p.m. UTC | #10
Hi Mike,

On 2020-06-03 16:57, Mike Leach wrote:
> Hi,
> 
> On Wed, 3 Jun 2020 at 11:24, Sai Prakash Ranjan
> <saiprakash.ranjan@codeaurora.org> wrote:
>> 
>> Hi Mike,
>> 
>> Thanks again for looking at this.
>> 
>> On 2020-06-03 03:42, Mike Leach wrote:
>> [...]
>> 
>> >>
>> >> SMMU/IOMMU won't be able to do much here as it is the client's
>> >> responsiblity to
>> >> properly shutdown and SMMU device link just makes sure that
>> >> SMMU(supplier) shutdown is
>> >> called only after its consumers shutdown callbacks are called.
>> >
>> > I think this use case can be handled slightly differently than the
>> > general requirements for modular CoreSight drivers.
>> >
>> > What is needed here is a way of stopping the underlying ETR hardware
>> > from issuing data to the SMMU, until the entire device has been shut
>> > down, in a way that does not remove the driver, breaking existing
>> > references and causing a system crash.
>> >
>> > We could introduce a new mode to the ETR driver - e.g.
>> > CS_MODE_SHUTDOWN.
>> >
>> > At the end of the block tmc_shutdown(struct amba_device *adev), set
>> > drvdata->mode to CS_MODE_SHUTDOWN & remove the coresight_unregister().
>> > This new mode can be used to  prevent the underlying hardware from
>> > being able to restart until the device is re-powered.
>> >
>> > This mode can be detected in the code that enables / disables the ETR
>> > and handled appropriately (updates to tmc_enable_etr_sink and
>> > tmc_disable_etr_sink).
>> > This mode will persist until the device is re-started - but because we
>> > are on the device shutdown path this is not an issue.
>> >
>> > This should leave the CoreSight infrastructure stable until the
>> > drivers are shut down normally as part of the device power down
>> > process.
>> >
>> 
>> Sounds good to me, but if the coresight_unregister() is the trouble
>> point
>> causing these crashes, then can't we just remove that from
>> tmc_shutdown()
>> callback? This would be like maintaining the same behaviour as now 
>> where
>> on reboot/shutdown we basically don't do anything except for disabling
>> ETR.
> 
> No - the new mode prevents race conditions where the thread shutting
> down the SMMU does the ETR shutdown, but then another thread happens
> to be trying to start trace and restarts the ETR.
> It also prevents the condition Mathieu discussed where a thread might
> be attempting to shutdown trace - this could try to disable the
> hardware again re-releasing resources/ re-flushing and waiting for
> stop.
> 

I do not think there will a race between SMMU shutdown and ETR shutdown.
Driver core takes care of calling SMMU shutdown after its consumer
shutdown callbacks via device link, otherwise there would already be
bugs in all other client drivers.

Thanks,
Sai
Robin Murphy June 3, 2020, 12:21 p.m. UTC | #11
On 2020-06-03 13:00, Sai Prakash Ranjan wrote:
> Hi Robin, Mathieu
> 
> On 2020-06-03 17:07, Robin Murphy wrote:
>> On 2020-06-01 22:28, Mathieu Poirier wrote:
>>> That being said I'm sure that dependencies on an IOMMU isn't a 
>>> problem confined
>>> to coresight. I am adding Robin Murphy, who added this commit [1], to 
>>> the thread
>>> in the hope that he can provide guidance on the right way to do this.
>>
>> Right, it's not specific to CoreSight, and it's not even specific to
>> IOMMUs really. In short, blame kexec ;)
>>
> 
> Yes it is not specific to coresight, we are targeting this for all
> consumers/clients of SMMU(atleast on SC7180 SoC). We have display throwing
> NoC/interconnect errors[1] during reboot after SMMU is disabled.
> This is also not specific to kexec either as you explained here [2] about
> a case with display which is exacly what is happening in our system [1].

Sure, but those instances are begging the question of why the SMMU is 
disabled at reboot in the first place ;)

Robin.

> 
> [1] 
> https://lore.kernel.org/lkml/1591009402-681-1-git-send-email-mkrishn@codeaurora.org/ 
> 
> [2] 
> https://lore.kernel.org/lkml/5858bdac-b7f9-ac26-0c0d-c9653cef841d@arm.com/
> 
>> The fundamental thing is that devices should stop any DMA activity at
>> shutdown. For a normal poweroff you can typically get away without
>> doing so, but over kexec, ongoing DMA traffic may corrupt memory in
>> the new kernel (at worst, I think even DMA reads could potentially
>> cause unexpected cache behaviour that might lead to mishaps, given the
>> right combination of memory attributes).
>>
>> IOMMUs merely help to make the situation more serious. For similar
>> kexec reasons, they need to disable any existing translations at
>> shutdown (imagine if the second kernel didn't have an IOMMU driver).
>> And at that point, even the normal poweroff case becomes problematic,
>> because any device DMA that hasn't been shut down beforehand is now
>> not necessarily going benignly to memory as it would in the no-IOMMU
>> case above, but potentially to random physical addresses, with all the
>> hilarity ensuing that you would expect from that.
>>
> 
> Thanks,
> Sai
Sai Prakash Ranjan June 3, 2020, 12:26 p.m. UTC | #12
Hi Robin,

On 2020-06-03 17:51, Robin Murphy wrote:
> On 2020-06-03 13:00, Sai Prakash Ranjan wrote:
>> Hi Robin, Mathieu
>> 
>> On 2020-06-03 17:07, Robin Murphy wrote:
>>> On 2020-06-01 22:28, Mathieu Poirier wrote:
>>>> That being said I'm sure that dependencies on an IOMMU isn't a 
>>>> problem confined
>>>> to coresight. I am adding Robin Murphy, who added this commit [1], 
>>>> to the thread
>>>> in the hope that he can provide guidance on the right way to do 
>>>> this.
>>> 
>>> Right, it's not specific to CoreSight, and it's not even specific to
>>> IOMMUs really. In short, blame kexec ;)
>>> 
>> 
>> Yes it is not specific to coresight, we are targeting this for all
>> consumers/clients of SMMU(atleast on SC7180 SoC). We have display 
>> throwing
>> NoC/interconnect errors[1] during reboot after SMMU is disabled.
>> This is also not specific to kexec either as you explained here [2] 
>> about
>> a case with display which is exacly what is happening in our system 
>> [1].
> 
> Sure, but those instances are begging the question of why the SMMU is
> disabled at reboot in the first place ;)
> 

That is what happens in SMMU shutdown callback right? It is the 
reboot/shutdown
flow.

    arm_smmu_device_shutdown()
     platform_drv_shutdown()
      device_shutdown()
       kernel_restart_prepare()
        kernel_restart()

Thanks,
Sai
Mike Leach June 3, 2020, 1:22 p.m. UTC | #13
Hi Sai,

On Wed, 3 Jun 2020 at 13:14, Sai Prakash Ranjan
<saiprakash.ranjan@codeaurora.org> wrote:
>
> Hi Mike,
>
> On 2020-06-03 16:57, Mike Leach wrote:
> > Hi,
> >
> > On Wed, 3 Jun 2020 at 11:24, Sai Prakash Ranjan
> > <saiprakash.ranjan@codeaurora.org> wrote:
> >>
> >> Hi Mike,
> >>
> >> Thanks again for looking at this.
> >>
> >> On 2020-06-03 03:42, Mike Leach wrote:
> >> [...]
> >>
> >> >>
> >> >> SMMU/IOMMU won't be able to do much here as it is the client's
> >> >> responsiblity to
> >> >> properly shutdown and SMMU device link just makes sure that
> >> >> SMMU(supplier) shutdown is
> >> >> called only after its consumers shutdown callbacks are called.
> >> >
> >> > I think this use case can be handled slightly differently than the
> >> > general requirements for modular CoreSight drivers.
> >> >
> >> > What is needed here is a way of stopping the underlying ETR hardware
> >> > from issuing data to the SMMU, until the entire device has been shut
> >> > down, in a way that does not remove the driver, breaking existing
> >> > references and causing a system crash.
> >> >
> >> > We could introduce a new mode to the ETR driver - e.g.
> >> > CS_MODE_SHUTDOWN.
> >> >
> >> > At the end of the block tmc_shutdown(struct amba_device *adev), set
> >> > drvdata->mode to CS_MODE_SHUTDOWN & remove the coresight_unregister().
> >> > This new mode can be used to  prevent the underlying hardware from
> >> > being able to restart until the device is re-powered.
> >> >
> >> > This mode can be detected in the code that enables / disables the ETR
> >> > and handled appropriately (updates to tmc_enable_etr_sink and
> >> > tmc_disable_etr_sink).
> >> > This mode will persist until the device is re-started - but because we
> >> > are on the device shutdown path this is not an issue.
> >> >
> >> > This should leave the CoreSight infrastructure stable until the
> >> > drivers are shut down normally as part of the device power down
> >> > process.
> >> >
> >>
> >> Sounds good to me, but if the coresight_unregister() is the trouble
> >> point
> >> causing these crashes, then can't we just remove that from
> >> tmc_shutdown()
> >> callback? This would be like maintaining the same behaviour as now
> >> where
> >> on reboot/shutdown we basically don't do anything except for disabling
> >> ETR.
> >
> > No - the new mode prevents race conditions where the thread shutting
> > down the SMMU does the ETR shutdown, but then another thread happens
> > to be trying to start trace and restarts the ETR.
> > It also prevents the condition Mathieu discussed where a thread might
> > be attempting to shutdown trace - this could try to disable the
> > hardware again re-releasing resources/ re-flushing and waiting for
> > stop.
> >
>
> I do not think there will a race between SMMU shutdown and ETR shutdown.
> Driver core takes care of calling SMMU shutdown after its consumer
> shutdown callbacks via device link, otherwise there would already be
> bugs in all other client drivers.
>

I am not saying there could be a race between tmc_shutdowm and
Smmu_shutdown - there may be a case if the coresight_disable_path
sequence is running and gets to the point of disabling the ETR after
the SMMU callback has disabled it.

Mike

> Thanks,
> Sai
> --
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a
> member
> of Code Aurora Forum, hosted by The Linux Foundation
Robin Murphy June 3, 2020, 1:34 p.m. UTC | #14
On 2020-06-03 14:22, Mike Leach wrote:
> Hi Sai,
> 
> On Wed, 3 Jun 2020 at 13:14, Sai Prakash Ranjan
> <saiprakash.ranjan@codeaurora.org> wrote:
>>
>> Hi Mike,
>>
>> On 2020-06-03 16:57, Mike Leach wrote:
>>> Hi,
>>>
>>> On Wed, 3 Jun 2020 at 11:24, Sai Prakash Ranjan
>>> <saiprakash.ranjan@codeaurora.org> wrote:
>>>>
>>>> Hi Mike,
>>>>
>>>> Thanks again for looking at this.
>>>>
>>>> On 2020-06-03 03:42, Mike Leach wrote:
>>>> [...]
>>>>
>>>>>>
>>>>>> SMMU/IOMMU won't be able to do much here as it is the client's
>>>>>> responsiblity to
>>>>>> properly shutdown and SMMU device link just makes sure that
>>>>>> SMMU(supplier) shutdown is
>>>>>> called only after its consumers shutdown callbacks are called.
>>>>>
>>>>> I think this use case can be handled slightly differently than the
>>>>> general requirements for modular CoreSight drivers.
>>>>>
>>>>> What is needed here is a way of stopping the underlying ETR hardware
>>>>> from issuing data to the SMMU, until the entire device has been shut
>>>>> down, in a way that does not remove the driver, breaking existing
>>>>> references and causing a system crash.
>>>>>
>>>>> We could introduce a new mode to the ETR driver - e.g.
>>>>> CS_MODE_SHUTDOWN.
>>>>>
>>>>> At the end of the block tmc_shutdown(struct amba_device *adev), set
>>>>> drvdata->mode to CS_MODE_SHUTDOWN & remove the coresight_unregister().
>>>>> This new mode can be used to  prevent the underlying hardware from
>>>>> being able to restart until the device is re-powered.
>>>>>
>>>>> This mode can be detected in the code that enables / disables the ETR
>>>>> and handled appropriately (updates to tmc_enable_etr_sink and
>>>>> tmc_disable_etr_sink).
>>>>> This mode will persist until the device is re-started - but because we
>>>>> are on the device shutdown path this is not an issue.
>>>>>
>>>>> This should leave the CoreSight infrastructure stable until the
>>>>> drivers are shut down normally as part of the device power down
>>>>> process.
>>>>>
>>>>
>>>> Sounds good to me, but if the coresight_unregister() is the trouble
>>>> point
>>>> causing these crashes, then can't we just remove that from
>>>> tmc_shutdown()
>>>> callback? This would be like maintaining the same behaviour as now
>>>> where
>>>> on reboot/shutdown we basically don't do anything except for disabling
>>>> ETR.
>>>
>>> No - the new mode prevents race conditions where the thread shutting
>>> down the SMMU does the ETR shutdown, but then another thread happens
>>> to be trying to start trace and restarts the ETR.
>>> It also prevents the condition Mathieu discussed where a thread might
>>> be attempting to shutdown trace - this could try to disable the
>>> hardware again re-releasing resources/ re-flushing and waiting for
>>> stop.
>>>
>>
>> I do not think there will a race between SMMU shutdown and ETR shutdown.
>> Driver core takes care of calling SMMU shutdown after its consumer
>> shutdown callbacks via device link, otherwise there would already be
>> bugs in all other client drivers.
>>
> 
> I am not saying there could be a race between tmc_shutdowm and
> Smmu_shutdown - there may be a case if the coresight_disable_path
> sequence is running and gets to the point of disabling the ETR after
> the SMMU callback has disabled it.

I'm confused now - there is no "SMMU callback", we're talking about the 
system-wide cleanup from kernel_shutdown_prepare() or 
kernel_restart_prepare(). As far as I'm aware userspace should be long 
gone by that point, so although trace may have been left running, the 
chance of racing against other driver operations seems pretty unlikely.

Robin.
Robin Murphy June 3, 2020, 1:40 p.m. UTC | #15
On 2020-06-03 13:26, Sai Prakash Ranjan wrote:
> Hi Robin,
> 
> On 2020-06-03 17:51, Robin Murphy wrote:
>> On 2020-06-03 13:00, Sai Prakash Ranjan wrote:
>>> Hi Robin, Mathieu
>>>
>>> On 2020-06-03 17:07, Robin Murphy wrote:
>>>> On 2020-06-01 22:28, Mathieu Poirier wrote:
>>>>> That being said I'm sure that dependencies on an IOMMU isn't a 
>>>>> problem confined
>>>>> to coresight. I am adding Robin Murphy, who added this commit [1], 
>>>>> to the thread
>>>>> in the hope that he can provide guidance on the right way to do this.
>>>>
>>>> Right, it's not specific to CoreSight, and it's not even specific to
>>>> IOMMUs really. In short, blame kexec ;)
>>>>
>>>
>>> Yes it is not specific to coresight, we are targeting this for all
>>> consumers/clients of SMMU(atleast on SC7180 SoC). We have display 
>>> throwing
>>> NoC/interconnect errors[1] during reboot after SMMU is disabled.
>>> This is also not specific to kexec either as you explained here [2] 
>>> about
>>> a case with display which is exacly what is happening in our system [1].
>>
>> Sure, but those instances are begging the question of why the SMMU is
>> disabled at reboot in the first place ;)
>>
> 
> That is what happens in SMMU shutdown callback right? It is the 
> reboot/shutdown flow.

Yes, that's where it happens, but my point is *why* it happens at all.

hint: `git log --grep=shutdown drivers/iommu/`

If we could assume the system is always about to be powered off or 
reset, we wouldn't need to do anything to the SMMU either ;)

Robin.

> 
>     arm_smmu_device_shutdown()
>      platform_drv_shutdown()
>       device_shutdown()
>        kernel_restart_prepare()
>         kernel_restart()
> 
> Thanks,
> Sai
>
Sai Prakash Ranjan June 3, 2020, 1:43 p.m. UTC | #16
Hi Mike,

On 2020-06-03 19:04, Robin Murphy wrote:
> On 2020-06-03 14:22, Mike Leach wrote:
>> Hi Sai,
>> 
>> On Wed, 3 Jun 2020 at 13:14, Sai Prakash Ranjan
>> <saiprakash.ranjan@codeaurora.org> wrote:
>>> 
>>> Hi Mike,
>>> 
>>> On 2020-06-03 16:57, Mike Leach wrote:
>>>> Hi,
>>>> 
>>>> On Wed, 3 Jun 2020 at 11:24, Sai Prakash Ranjan
>>>> <saiprakash.ranjan@codeaurora.org> wrote:
>>>>> 
>>>>> Hi Mike,
>>>>> 
>>>>> Thanks again for looking at this.
>>>>> 
>>>>> On 2020-06-03 03:42, Mike Leach wrote:
>>>>> [...]
>>>>> 
>>>>>>> 
>>>>>>> SMMU/IOMMU won't be able to do much here as it is the client's
>>>>>>> responsiblity to
>>>>>>> properly shutdown and SMMU device link just makes sure that
>>>>>>> SMMU(supplier) shutdown is
>>>>>>> called only after its consumers shutdown callbacks are called.
>>>>>> 
>>>>>> I think this use case can be handled slightly differently than the
>>>>>> general requirements for modular CoreSight drivers.
>>>>>> 
>>>>>> What is needed here is a way of stopping the underlying ETR 
>>>>>> hardware
>>>>>> from issuing data to the SMMU, until the entire device has been 
>>>>>> shut
>>>>>> down, in a way that does not remove the driver, breaking existing
>>>>>> references and causing a system crash.
>>>>>> 
>>>>>> We could introduce a new mode to the ETR driver - e.g.
>>>>>> CS_MODE_SHUTDOWN.
>>>>>> 
>>>>>> At the end of the block tmc_shutdown(struct amba_device *adev), 
>>>>>> set
>>>>>> drvdata->mode to CS_MODE_SHUTDOWN & remove the 
>>>>>> coresight_unregister().
>>>>>> This new mode can be used to  prevent the underlying hardware from
>>>>>> being able to restart until the device is re-powered.
>>>>>> 
>>>>>> This mode can be detected in the code that enables / disables the 
>>>>>> ETR
>>>>>> and handled appropriately (updates to tmc_enable_etr_sink and
>>>>>> tmc_disable_etr_sink).
>>>>>> This mode will persist until the device is re-started - but 
>>>>>> because we
>>>>>> are on the device shutdown path this is not an issue.
>>>>>> 
>>>>>> This should leave the CoreSight infrastructure stable until the
>>>>>> drivers are shut down normally as part of the device power down
>>>>>> process.
>>>>>> 
>>>>> 
>>>>> Sounds good to me, but if the coresight_unregister() is the trouble
>>>>> point
>>>>> causing these crashes, then can't we just remove that from
>>>>> tmc_shutdown()
>>>>> callback? This would be like maintaining the same behaviour as now
>>>>> where
>>>>> on reboot/shutdown we basically don't do anything except for 
>>>>> disabling
>>>>> ETR.
>>>> 
>>>> No - the new mode prevents race conditions where the thread shutting
>>>> down the SMMU does the ETR shutdown, but then another thread happens
>>>> to be trying to start trace and restarts the ETR.
>>>> It also prevents the condition Mathieu discussed where a thread 
>>>> might
>>>> be attempting to shutdown trace - this could try to disable the
>>>> hardware again re-releasing resources/ re-flushing and waiting for
>>>> stop.
>>>> 
>>> 
>>> I do not think there will a race between SMMU shutdown and ETR 
>>> shutdown.
>>> Driver core takes care of calling SMMU shutdown after its consumer
>>> shutdown callbacks via device link, otherwise there would already be
>>> bugs in all other client drivers.
>>> 
>> 
>> I am not saying there could be a race between tmc_shutdowm and
>> Smmu_shutdown - there may be a case if the coresight_disable_path
>> sequence is running and gets to the point of disabling the ETR after
>> the SMMU callback has disabled it.
> 
> I'm confused now - there is no "SMMU callback", we're talking about
> the system-wide cleanup from kernel_shutdown_prepare() or
> kernel_restart_prepare(). As far as I'm aware userspace should be long
> gone by that point, so although trace may have been left running, the
> chance of racing against other driver operations seems pretty
> unlikely.
> 

As Robin said, it is not SMMU callback but the normal reboot/shutdown
flow and race is unlikely at that point.

    tmc_shutdown()
     platform_drv_shutdown()
       device_shutdown()
        kernel_restart_prepare()
         kernel_restart()

If I am not clear enough, first all the consumer shutdown callbacks of 
SMMU
are called like above tmc_shutdown() and then we call the 
arm_smmu_device_shutdown(),
this ordering is ensured by the device links.

Thanks,
Sai
Sai Prakash Ranjan June 3, 2020, 1:51 p.m. UTC | #17
Hi Robin,

On 2020-06-03 19:10, Robin Murphy wrote:
> On 2020-06-03 13:26, Sai Prakash Ranjan wrote:
>> Hi Robin,
>> 
>> On 2020-06-03 17:51, Robin Murphy wrote:
>>> On 2020-06-03 13:00, Sai Prakash Ranjan wrote:
>>>> Hi Robin, Mathieu
>>>> 
>>>> On 2020-06-03 17:07, Robin Murphy wrote:
>>>>> On 2020-06-01 22:28, Mathieu Poirier wrote:
>>>>>> That being said I'm sure that dependencies on an IOMMU isn't a 
>>>>>> problem confined
>>>>>> to coresight. I am adding Robin Murphy, who added this commit [1], 
>>>>>> to the thread
>>>>>> in the hope that he can provide guidance on the right way to do 
>>>>>> this.
>>>>> 
>>>>> Right, it's not specific to CoreSight, and it's not even specific 
>>>>> to
>>>>> IOMMUs really. In short, blame kexec ;)
>>>>> 
>>>> 
>>>> Yes it is not specific to coresight, we are targeting this for all
>>>> consumers/clients of SMMU(atleast on SC7180 SoC). We have display 
>>>> throwing
>>>> NoC/interconnect errors[1] during reboot after SMMU is disabled.
>>>> This is also not specific to kexec either as you explained here [2] 
>>>> about
>>>> a case with display which is exacly what is happening in our system 
>>>> [1].
>>> 
>>> Sure, but those instances are begging the question of why the SMMU is
>>> disabled at reboot in the first place ;)
>>> 
>> 
>> That is what happens in SMMU shutdown callback right? It is the 
>> reboot/shutdown flow.
> 
> Yes, that's where it happens, but my point is *why* it happens at all.
> 
> hint: `git log --grep=shutdown drivers/iommu/`
> 

Ah my change :)

> If we could assume the system is always about to be powered off or
> reset, we wouldn't need to do anything to the SMMU either ;)
> 

Are you hinting at removing SMMU shutdown callback altogether ;)

Thanks,
Sai
Mike Leach June 3, 2020, 1:51 p.m. UTC | #18
Hi,


On Wed, 3 Jun 2020 at 14:34, Robin Murphy <robin.murphy@arm.com> wrote:
>
> On 2020-06-03 14:22, Mike Leach wrote:
> > Hi Sai,
> >
> > On Wed, 3 Jun 2020 at 13:14, Sai Prakash Ranjan
> > <saiprakash.ranjan@codeaurora.org> wrote:
> >>
> >> Hi Mike,
> >>
> >> On 2020-06-03 16:57, Mike Leach wrote:
> >>> Hi,
> >>>
> >>> On Wed, 3 Jun 2020 at 11:24, Sai Prakash Ranjan
> >>> <saiprakash.ranjan@codeaurora.org> wrote:
> >>>>
> >>>> Hi Mike,
> >>>>
> >>>> Thanks again for looking at this.
> >>>>
> >>>> On 2020-06-03 03:42, Mike Leach wrote:
> >>>> [...]
> >>>>
> >>>>>>
> >>>>>> SMMU/IOMMU won't be able to do much here as it is the client's
> >>>>>> responsiblity to
> >>>>>> properly shutdown and SMMU device link just makes sure that
> >>>>>> SMMU(supplier) shutdown is
> >>>>>> called only after its consumers shutdown callbacks are called.
> >>>>>
> >>>>> I think this use case can be handled slightly differently than the
> >>>>> general requirements for modular CoreSight drivers.
> >>>>>
> >>>>> What is needed here is a way of stopping the underlying ETR hardware
> >>>>> from issuing data to the SMMU, until the entire device has been shut
> >>>>> down, in a way that does not remove the driver, breaking existing
> >>>>> references and causing a system crash.
> >>>>>
> >>>>> We could introduce a new mode to the ETR driver - e.g.
> >>>>> CS_MODE_SHUTDOWN.
> >>>>>
> >>>>> At the end of the block tmc_shutdown(struct amba_device *adev), set
> >>>>> drvdata->mode to CS_MODE_SHUTDOWN & remove the coresight_unregister().
> >>>>> This new mode can be used to  prevent the underlying hardware from
> >>>>> being able to restart until the device is re-powered.
> >>>>>
> >>>>> This mode can be detected in the code that enables / disables the ETR
> >>>>> and handled appropriately (updates to tmc_enable_etr_sink and
> >>>>> tmc_disable_etr_sink).
> >>>>> This mode will persist until the device is re-started - but because we
> >>>>> are on the device shutdown path this is not an issue.
> >>>>>
> >>>>> This should leave the CoreSight infrastructure stable until the
> >>>>> drivers are shut down normally as part of the device power down
> >>>>> process.
> >>>>>
> >>>>
> >>>> Sounds good to me, but if the coresight_unregister() is the trouble
> >>>> point
> >>>> causing these crashes, then can't we just remove that from
> >>>> tmc_shutdown()
> >>>> callback? This would be like maintaining the same behaviour as now
> >>>> where
> >>>> on reboot/shutdown we basically don't do anything except for disabling
> >>>> ETR.
> >>>
> >>> No - the new mode prevents race conditions where the thread shutting
> >>> down the SMMU does the ETR shutdown, but then another thread happens
> >>> to be trying to start trace and restarts the ETR.
> >>> It also prevents the condition Mathieu discussed where a thread might
> >>> be attempting to shutdown trace - this could try to disable the
> >>> hardware again re-releasing resources/ re-flushing and waiting for
> >>> stop.
> >>>
> >>
> >> I do not think there will a race between SMMU shutdown and ETR shutdown.
> >> Driver core takes care of calling SMMU shutdown after its consumer
> >> shutdown callbacks via device link, otherwise there would already be
> >> bugs in all other client drivers.
> >>
> >
> > I am not saying there could be a race between tmc_shutdowm and
> > Smmu_shutdown - there may be a case if the coresight_disable_path
> > sequence is running and gets to the point of disabling the ETR after
> > the SMMU callback has disabled it.
>
> I'm confused now - there is no "SMMU callback", we're talking about the
> system-wide cleanup from kernel_shutdown_prepare() or
> kernel_restart_prepare(). As far as I'm aware userspace should be long
> gone by that point, so although trace may have been left running ||
           ((offset >= TRCCIDCVRn(0)) && (offset <= TRCVMIDCVRn(7)), the
> chance of racing against other driver operations seems pretty unlikely.
>

Sorry - bad choice of terminology. I was referring to the SMMU
ensuring that it had all its clients shut-down before if shut down. To
quote Sai...

>>>>> SMMU device link just makes sure that
> >>>>>> SMMU(supplier) shutdown is
> >>>>>> called only after its consumers shutdown callbacks are called.

I agree it is unlikely, but if removing the device from the CoreSight
infrastructure via coresight_unregister() is a potential source of a
crash, it would seem that there is a potential path where some
CoreSight driver side work might be possible. therefore a mode to
prevent this crash, and ensure that the device hardware remains off
and not sending trace to SMMU until such time as shutdown / reboot
restart occurs, seemed prudent.

Mike

> Robin.



--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
Sai Prakash Ranjan June 3, 2020, 2:02 p.m. UTC | #19
Hi Mike,

On 2020-06-03 19:21, Mike Leach wrote:
> Hi,
> 
> 
> On Wed, 3 Jun 2020 at 14:34, Robin Murphy <robin.murphy@arm.com> wrote:
>> 
>> On 2020-06-03 14:22, Mike Leach wrote:
>> > Hi Sai,
>> >
>> > On Wed, 3 Jun 2020 at 13:14, Sai Prakash Ranjan
>> > <saiprakash.ranjan@codeaurora.org> wrote:
>> >>
>> >> Hi Mike,
>> >>
>> >> On 2020-06-03 16:57, Mike Leach wrote:
>> >>> Hi,
>> >>>
>> >>> On Wed, 3 Jun 2020 at 11:24, Sai Prakash Ranjan
>> >>> <saiprakash.ranjan@codeaurora.org> wrote:
>> >>>>
>> >>>> Hi Mike,
>> >>>>
>> >>>> Thanks again for looking at this.
>> >>>>
>> >>>> On 2020-06-03 03:42, Mike Leach wrote:
>> >>>> [...]
>> >>>>
>> >>>>>>
>> >>>>>> SMMU/IOMMU won't be able to do much here as it is the client's
>> >>>>>> responsiblity to
>> >>>>>> properly shutdown and SMMU device link just makes sure that
>> >>>>>> SMMU(supplier) shutdown is
>> >>>>>> called only after its consumers shutdown callbacks are called.
>> >>>>>
>> >>>>> I think this use case can be handled slightly differently than the
>> >>>>> general requirements for modular CoreSight drivers.
>> >>>>>
>> >>>>> What is needed here is a way of stopping the underlying ETR hardware
>> >>>>> from issuing data to the SMMU, until the entire device has been shut
>> >>>>> down, in a way that does not remove the driver, breaking existing
>> >>>>> references and causing a system crash.
>> >>>>>
>> >>>>> We could introduce a new mode to the ETR driver - e.g.
>> >>>>> CS_MODE_SHUTDOWN.
>> >>>>>
>> >>>>> At the end of the block tmc_shutdown(struct amba_device *adev), set
>> >>>>> drvdata->mode to CS_MODE_SHUTDOWN & remove the coresight_unregister().
>> >>>>> This new mode can be used to  prevent the underlying hardware from
>> >>>>> being able to restart until the device is re-powered.
>> >>>>>
>> >>>>> This mode can be detected in the code that enables / disables the ETR
>> >>>>> and handled appropriately (updates to tmc_enable_etr_sink and
>> >>>>> tmc_disable_etr_sink).
>> >>>>> This mode will persist until the device is re-started - but because we
>> >>>>> are on the device shutdown path this is not an issue.
>> >>>>>
>> >>>>> This should leave the CoreSight infrastructure stable until the
>> >>>>> drivers are shut down normally as part of the device power down
>> >>>>> process.
>> >>>>>
>> >>>>
>> >>>> Sounds good to me, but if the coresight_unregister() is the trouble
>> >>>> point
>> >>>> causing these crashes, then can't we just remove that from
>> >>>> tmc_shutdown()
>> >>>> callback? This would be like maintaining the same behaviour as now
>> >>>> where
>> >>>> on reboot/shutdown we basically don't do anything except for disabling
>> >>>> ETR.
>> >>>
>> >>> No - the new mode prevents race conditions where the thread shutting
>> >>> down the SMMU does the ETR shutdown, but then another thread happens
>> >>> to be trying to start trace and restarts the ETR.
>> >>> It also prevents the condition Mathieu discussed where a thread might
>> >>> be attempting to shutdown trace - this could try to disable the
>> >>> hardware again re-releasing resources/ re-flushing and waiting for
>> >>> stop.
>> >>>
>> >>
>> >> I do not think there will a race between SMMU shutdown and ETR shutdown.
>> >> Driver core takes care of calling SMMU shutdown after its consumer
>> >> shutdown callbacks via device link, otherwise there would already be
>> >> bugs in all other client drivers.
>> >>
>> >
>> > I am not saying there could be a race between tmc_shutdowm and
>> > Smmu_shutdown - there may be a case if the coresight_disable_path
>> > sequence is running and gets to the point of disabling the ETR after
>> > the SMMU callback has disabled it.
>> 
>> I'm confused now - there is no "SMMU callback", we're talking about 
>> the
>> system-wide cleanup from kernel_shutdown_prepare() or
>> kernel_restart_prepare(). As far as I'm aware userspace should be long
>> gone by that point, so although trace may have been left running ||
>            ((offset >= TRCCIDCVRn(0)) && (offset <= TRCVMIDCVRn(7)), 
> the
>> chance of racing against other driver operations seems pretty 
>> unlikely.
>> 
> 
> Sorry - bad choice of terminology. I was referring to the SMMU
> ensuring that it had all its clients shut-down before if shut down. To
> quote Sai...
> 
>>>>>> SMMU device link just makes sure that
>> >>>>>> SMMU(supplier) shutdown is
>> >>>>>> called only after its consumers shutdown callbacks are called.
> 
> I agree it is unlikely, but if removing the device from the CoreSight
> infrastructure via coresight_unregister() is a potential source of a
> crash, it would seem that there is a potential path where some
> CoreSight driver side work might be possible. therefore a mode to
> prevent this crash, and ensure that the device hardware remains off
> and not sending trace to SMMU until such time as shutdown / reboot
> restart occurs, seemed prudent.
> 

Actually I did not see any crash with coresight_unregister() during
reboot/shutdown as I mentioned previously to Mathieu's query on
this being similar to remove callback. I think the crash with
coresight_unregister() is only seen when we have coresight as module
and the userspace is pretty much there to enable/disable trace when
we try to bind/unbind. But here we only consider the system 
reboot/shutdown
where pretty much everything is down by this point.

Thanks,
Sai
Mathieu Poirier June 3, 2020, 5:44 p.m. UTC | #20
On Wed, Jun 03, 2020 at 02:34:10PM +0100, Robin Murphy wrote:
> On 2020-06-03 14:22, Mike Leach wrote:
> > Hi Sai,
> > 
> > On Wed, 3 Jun 2020 at 13:14, Sai Prakash Ranjan
> > <saiprakash.ranjan@codeaurora.org> wrote:
> > > 
> > > Hi Mike,
> > > 
> > > On 2020-06-03 16:57, Mike Leach wrote:
> > > > Hi,
> > > > 
> > > > On Wed, 3 Jun 2020 at 11:24, Sai Prakash Ranjan
> > > > <saiprakash.ranjan@codeaurora.org> wrote:
> > > > > 
> > > > > Hi Mike,
> > > > > 
> > > > > Thanks again for looking at this.
> > > > > 
> > > > > On 2020-06-03 03:42, Mike Leach wrote:
> > > > > [...]
> > > > > 
> > > > > > > 
> > > > > > > SMMU/IOMMU won't be able to do much here as it is the client's
> > > > > > > responsiblity to
> > > > > > > properly shutdown and SMMU device link just makes sure that
> > > > > > > SMMU(supplier) shutdown is
> > > > > > > called only after its consumers shutdown callbacks are called.
> > > > > > 
> > > > > > I think this use case can be handled slightly differently than the
> > > > > > general requirements for modular CoreSight drivers.
> > > > > > 
> > > > > > What is needed here is a way of stopping the underlying ETR hardware
> > > > > > from issuing data to the SMMU, until the entire device has been shut
> > > > > > down, in a way that does not remove the driver, breaking existing
> > > > > > references and causing a system crash.
> > > > > > 
> > > > > > We could introduce a new mode to the ETR driver - e.g.
> > > > > > CS_MODE_SHUTDOWN.
> > > > > > 
> > > > > > At the end of the block tmc_shutdown(struct amba_device *adev), set
> > > > > > drvdata->mode to CS_MODE_SHUTDOWN & remove the coresight_unregister().
> > > > > > This new mode can be used to  prevent the underlying hardware from
> > > > > > being able to restart until the device is re-powered.
> > > > > > 
> > > > > > This mode can be detected in the code that enables / disables the ETR
> > > > > > and handled appropriately (updates to tmc_enable_etr_sink and
> > > > > > tmc_disable_etr_sink).
> > > > > > This mode will persist until the device is re-started - but because we
> > > > > > are on the device shutdown path this is not an issue.
> > > > > > 
> > > > > > This should leave the CoreSight infrastructure stable until the
> > > > > > drivers are shut down normally as part of the device power down
> > > > > > process.
> > > > > > 
> > > > > 
> > > > > Sounds good to me, but if the coresight_unregister() is the trouble
> > > > > point
> > > > > causing these crashes, then can't we just remove that from
> > > > > tmc_shutdown()
> > > > > callback? This would be like maintaining the same behaviour as now
> > > > > where
> > > > > on reboot/shutdown we basically don't do anything except for disabling
> > > > > ETR.
> > > > 
> > > > No - the new mode prevents race conditions where the thread shutting
> > > > down the SMMU does the ETR shutdown, but then another thread happens
> > > > to be trying to start trace and restarts the ETR.
> > > > It also prevents the condition Mathieu discussed where a thread might
> > > > be attempting to shutdown trace - this could try to disable the
> > > > hardware again re-releasing resources/ re-flushing and waiting for
> > > > stop.
> > > > 
> > > 
> > > I do not think there will a race between SMMU shutdown and ETR shutdown.
> > > Driver core takes care of calling SMMU shutdown after its consumer
> > > shutdown callbacks via device link, otherwise there would already be
> > > bugs in all other client drivers.
> > > 
> > 
> > I am not saying there could be a race between tmc_shutdowm and
> > Smmu_shutdown - there may be a case if the coresight_disable_path
> > sequence is running and gets to the point of disabling the ETR after
> > the SMMU callback has disabled it.
> 
> I'm confused now - there is no "SMMU callback", we're talking about the
> system-wide cleanup from kernel_shutdown_prepare() or
> kernel_restart_prepare(). As far as I'm aware userspace should be long gone
> by that point, so although trace may have been left running, the chance of
> racing against other driver operations seems pretty unlikely.

Robin has a point - user space is long gone at this time.  As such the first 
question to ask is what kind of CS session was running at the time the system
was shutting down.  Was it a perf session of a sysfs session?

I'm guessing it was a sysfs session because user space has been blown away a
while back and part of that process should have killed all perf sessions.

If I am correct then simply switching off the ETR HW in the shutdown() amba bus
callback should be fine - otherwise Mike's approach is mandatory.  There is 
also the exchange between Robin and Sai about removing the SMMU shutdown
callback, but that thread is still incomplete. 

Thanks,
Mathieu

> 
> Robin.
Sai Prakash Ranjan June 4, 2020, 7:27 a.m. UTC | #21
Hi Mathieu,

+Will

On 2020-06-03 23:14, Mathieu Poirier wrote:
> On Wed, Jun 03, 2020 at 02:34:10PM +0100, Robin Murphy wrote:
>> On 2020-06-03 14:22, Mike Leach wrote:
>> > Hi Sai,
>> >
>> > On Wed, 3 Jun 2020 at 13:14, Sai Prakash Ranjan
>> > <saiprakash.ranjan@codeaurora.org> wrote:
>> > >
>> > > Hi Mike,
>> > >
>> > > On 2020-06-03 16:57, Mike Leach wrote:
>> > > > Hi,
>> > > >
>> > > > On Wed, 3 Jun 2020 at 11:24, Sai Prakash Ranjan
>> > > > <saiprakash.ranjan@codeaurora.org> wrote:
>> > > > >
>> > > > > Hi Mike,
>> > > > >
>> > > > > Thanks again for looking at this.
>> > > > >
>> > > > > On 2020-06-03 03:42, Mike Leach wrote:
>> > > > > [...]
>> > > > >
>> > > > > > >
>> > > > > > > SMMU/IOMMU won't be able to do much here as it is the client's
>> > > > > > > responsiblity to
>> > > > > > > properly shutdown and SMMU device link just makes sure that
>> > > > > > > SMMU(supplier) shutdown is
>> > > > > > > called only after its consumers shutdown callbacks are called.
>> > > > > >
>> > > > > > I think this use case can be handled slightly differently than the
>> > > > > > general requirements for modular CoreSight drivers.
>> > > > > >
>> > > > > > What is needed here is a way of stopping the underlying ETR hardware
>> > > > > > from issuing data to the SMMU, until the entire device has been shut
>> > > > > > down, in a way that does not remove the driver, breaking existing
>> > > > > > references and causing a system crash.
>> > > > > >
>> > > > > > We could introduce a new mode to the ETR driver - e.g.
>> > > > > > CS_MODE_SHUTDOWN.
>> > > > > >
>> > > > > > At the end of the block tmc_shutdown(struct amba_device *adev), set
>> > > > > > drvdata->mode to CS_MODE_SHUTDOWN & remove the coresight_unregister().
>> > > > > > This new mode can be used to  prevent the underlying hardware from
>> > > > > > being able to restart until the device is re-powered.
>> > > > > >
>> > > > > > This mode can be detected in the code that enables / disables the ETR
>> > > > > > and handled appropriately (updates to tmc_enable_etr_sink and
>> > > > > > tmc_disable_etr_sink).
>> > > > > > This mode will persist until the device is re-started - but because we
>> > > > > > are on the device shutdown path this is not an issue.
>> > > > > >
>> > > > > > This should leave the CoreSight infrastructure stable until the
>> > > > > > drivers are shut down normally as part of the device power down
>> > > > > > process.
>> > > > > >
>> > > > >
>> > > > > Sounds good to me, but if the coresight_unregister() is the trouble
>> > > > > point
>> > > > > causing these crashes, then can't we just remove that from
>> > > > > tmc_shutdown()
>> > > > > callback? This would be like maintaining the same behaviour as now
>> > > > > where
>> > > > > on reboot/shutdown we basically don't do anything except for disabling
>> > > > > ETR.
>> > > >
>> > > > No - the new mode prevents race conditions where the thread shutting
>> > > > down the SMMU does the ETR shutdown, but then another thread happens
>> > > > to be trying to start trace and restarts the ETR.
>> > > > It also prevents the condition Mathieu discussed where a thread might
>> > > > be attempting to shutdown trace - this could try to disable the
>> > > > hardware again re-releasing resources/ re-flushing and waiting for
>> > > > stop.
>> > > >
>> > >
>> > > I do not think there will a race between SMMU shutdown and ETR shutdown.
>> > > Driver core takes care of calling SMMU shutdown after its consumer
>> > > shutdown callbacks via device link, otherwise there would already be
>> > > bugs in all other client drivers.
>> > >
>> >
>> > I am not saying there could be a race between tmc_shutdowm and
>> > Smmu_shutdown - there may be a case if the coresight_disable_path
>> > sequence is running and gets to the point of disabling the ETR after
>> > the SMMU callback has disabled it.
>> 
>> I'm confused now - there is no "SMMU callback", we're talking about 
>> the
>> system-wide cleanup from kernel_shutdown_prepare() or
>> kernel_restart_prepare(). As far as I'm aware userspace should be long 
>> gone
>> by that point, so although trace may have been left running, the 
>> chance of
>> racing against other driver operations seems pretty unlikely.
> 
> Robin has a point - user space is long gone at this time.  As such the 
> first
> question to ask is what kind of CS session was running at the time the 
> system
> was shutting down.  Was it a perf session of a sysfs session?
> 
> I'm guessing it was a sysfs session because user space has been blown 
> away a
> while back and part of that process should have killed all perf 
> sessions.

I was enabling trace via sysfs.

> 
> If I am correct then simply switching off the ETR HW in the shutdown() 
> amba bus
> callback should be fine - otherwise Mike's approach is mandatory.  
> There is
> also the exchange between Robin and Sai about removing the SMMU 
> shutdown
> callback, but that thread is still incomplete.
> 

If Robin is hinting at removing SMMU shutdown callback, then I think 
adding
all these shutdown callbacks to all clients of SMMU can be avoided. Git 
blaming
the thing shows it was added to avoid some kexec memory corruption.

Thanks,
Sai
Sai Prakash Ranjan June 8, 2020, 2:07 p.m. UTC | #22
Hi Mathieu, Mike

On 2020-06-04 12:57, Sai Prakash Ranjan wrote:
> 

[...]

>> 
>> Robin has a point - user space is long gone at this time.  As such the 
>> first
>> question to ask is what kind of CS session was running at the time the 
>> system
>> was shutting down.  Was it a perf session of a sysfs session?
>> 
>> I'm guessing it was a sysfs session because user space has been blown 
>> away a
>> while back and part of that process should have killed all perf 
>> sessions.
> 
> I was enabling trace via sysfs.
> 
>> 
>> If I am correct then simply switching off the ETR HW in the shutdown() 
>> amba bus
>> callback should be fine - otherwise Mike's approach is mandatory.  
>> There is
>> also the exchange between Robin and Sai about removing the SMMU 
>> shutdown
>> callback, but that thread is still incomplete.
>> 
> 
> If Robin is hinting at removing SMMU shutdown callback, then I think 
> adding
> all these shutdown callbacks to all clients of SMMU can be avoided. Git 
> blaming
> the thing shows it was added to avoid some kexec memory corruption.
> 

I think I misread the cryptic hint from Robin and it is not right to 
remove
SMMU shutdown callback. For more details on why that was a bad idea and 
would
break kexec, please refer to [1].

As for the coresight, can I disable the ETR only in the tmc shutdown 
callback
or are we still concerned about the userspace coming into picture?

[1] https://lore.kernel.org/patchwork/patch/1253131/

Thanks,
Sai
Mathieu Poirier June 9, 2020, 3:27 p.m. UTC | #23
On Mon, 8 Jun 2020 at 08:07, Sai Prakash Ranjan
<saiprakash.ranjan@codeaurora.org> wrote:
>
> Hi Mathieu, Mike
>
> On 2020-06-04 12:57, Sai Prakash Ranjan wrote:
> >
>
> [...]
>
> >>
> >> Robin has a point - user space is long gone at this time.  As such the
> >> first
> >> question to ask is what kind of CS session was running at the time the
> >> system
> >> was shutting down.  Was it a perf session of a sysfs session?
> >>
> >> I'm guessing it was a sysfs session because user space has been blown
> >> away a
> >> while back and part of that process should have killed all perf
> >> sessions.
> >
> > I was enabling trace via sysfs.
> >
> >>
> >> If I am correct then simply switching off the ETR HW in the shutdown()
> >> amba bus
> >> callback should be fine - otherwise Mike's approach is mandatory.
> >> There is
> >> also the exchange between Robin and Sai about removing the SMMU
> >> shutdown
> >> callback, but that thread is still incomplete.
> >>
> >
> > If Robin is hinting at removing SMMU shutdown callback, then I think
> > adding
> > all these shutdown callbacks to all clients of SMMU can be avoided. Git
> > blaming
> > the thing shows it was added to avoid some kexec memory corruption.
> >
>
> I think I misread the cryptic hint from Robin and it is not right to
> remove
> SMMU shutdown callback. For more details on why that was a bad idea and
> would
> break kexec, please refer to [1].
>
> As for the coresight, can I disable the ETR only in the tmc shutdown
> callback
> or are we still concerned about the userspace coming into picture?

User space isn't a concern, especially after you've confirmed the
problem occured during an ongoing sysfs session.

>
> [1] https://lore.kernel.org/patchwork/patch/1253131/
>
> Thanks,
> Sai
>
> --
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a
> member
> of Code Aurora Forum, hosted by The Linux Foundation
Sai Prakash Ranjan June 9, 2020, 3:37 p.m. UTC | #24
Hi Mathieu,

On 2020-06-09 20:57, Mathieu Poirier wrote:
> On Mon, 8 Jun 2020 at 08:07, Sai Prakash Ranjan
> <saiprakash.ranjan@codeaurora.org> wrote:
>> 
>> Hi Mathieu, Mike
>> 
>> On 2020-06-04 12:57, Sai Prakash Ranjan wrote:
>> >
>> 
>> [...]
>> 
>> >>
>> >> Robin has a point - user space is long gone at this time.  As such the
>> >> first
>> >> question to ask is what kind of CS session was running at the time the
>> >> system
>> >> was shutting down.  Was it a perf session of a sysfs session?
>> >>
>> >> I'm guessing it was a sysfs session because user space has been blown
>> >> away a
>> >> while back and part of that process should have killed all perf
>> >> sessions.
>> >
>> > I was enabling trace via sysfs.
>> >
>> >>
>> >> If I am correct then simply switching off the ETR HW in the shutdown()
>> >> amba bus
>> >> callback should be fine - otherwise Mike's approach is mandatory.
>> >> There is
>> >> also the exchange between Robin and Sai about removing the SMMU
>> >> shutdown
>> >> callback, but that thread is still incomplete.
>> >>
>> >
>> > If Robin is hinting at removing SMMU shutdown callback, then I think
>> > adding
>> > all these shutdown callbacks to all clients of SMMU can be avoided. Git
>> > blaming
>> > the thing shows it was added to avoid some kexec memory corruption.
>> >
>> 
>> I think I misread the cryptic hint from Robin and it is not right to
>> remove
>> SMMU shutdown callback. For more details on why that was a bad idea 
>> and
>> would
>> break kexec, please refer to [1].
>> 
>> As for the coresight, can I disable the ETR only in the tmc shutdown
>> callback
>> or are we still concerned about the userspace coming into picture?
> 
> User space isn't a concern, especially after you've confirmed the
> problem occured during an ongoing sysfs session.
> 

Will post v3 with comments addressed after 5.8-rc1 is out.

Thanks,
Sai
diff mbox series

Patch

diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
index 36cce2bfb744..cba3e7592820 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
@@ -85,7 +85,7 @@  static void __tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
 	CS_LOCK(drvdata->base);
 }
 
-static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
+void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
 {
 	__tmc_etb_disable_hw(drvdata);
 	coresight_disclaim_device(drvdata->base);
@@ -118,7 +118,7 @@  static int tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
 	return 0;
 }
 
-static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
+void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
 {
 	CS_UNLOCK(drvdata->base);
 
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 625882bc8b08..b29c2db94d96 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -1110,7 +1110,7 @@  static void __tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
 
 }
 
-static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
+void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
 {
 	__tmc_etr_disable_hw(drvdata);
 	/* Disable CATU device if this ETR is connected to one */
diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c
index 5a271ebc4585..7e687a356fe0 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.c
+++ b/drivers/hwtracing/coresight/coresight-tmc.c
@@ -540,6 +540,34 @@  static int tmc_probe(struct amba_device *adev, const struct amba_id *id)
 	return ret;
 }
 
+static void tmc_shutdown(struct amba_device *adev)
+{
+	struct tmc_drvdata *drvdata = amba_get_drvdata(adev);
+
+	if (!drvdata->enable)
+		goto out;
+
+	/*
+	 * We do not care about the active trace sessions here
+	 * since the system is going down unlike remove callback,
+	 * just make sure that the hardware is shutdown.
+	 */
+	switch (drvdata->config_type) {
+	case TMC_CONFIG_TYPE_ETB:
+		tmc_etb_disable_hw(drvdata);
+		break;
+	case TMC_CONFIG_TYPE_ETF:
+		tmc_etf_disable_hw(drvdata);
+		break;
+	case TMC_CONFIG_TYPE_ETR:
+		tmc_etr_disable_hw(drvdata);
+	}
+
+out:
+	misc_deregister(&drvdata->miscdev);
+	coresight_unregister(drvdata->csdev);
+}
+
 static const struct amba_id tmc_ids[] = {
 	CS_AMBA_ID(0x000bb961),
 	/* Coresight SoC 600 TMC-ETR/ETS */
@@ -558,6 +586,7 @@  static struct amba_driver tmc_driver = {
 		.suppress_bind_attrs = true,
 	},
 	.probe		= tmc_probe,
+	.shutdown	= tmc_shutdown,
 	.id_table	= tmc_ids,
 };
 builtin_amba_driver(tmc_driver);
diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
index d156860495c7..f4f56c474e58 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.h
+++ b/drivers/hwtracing/coresight/coresight-tmc.h
@@ -262,6 +262,8 @@  u32 tmc_get_memwidth_mask(struct tmc_drvdata *drvdata);
 /* ETB/ETF functions */
 int tmc_read_prepare_etb(struct tmc_drvdata *drvdata);
 int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata);
+void tmc_etb_disable_hw(struct tmc_drvdata *drvdata);
+void tmc_etf_disable_hw(struct tmc_drvdata *drvdata);
 extern const struct coresight_ops tmc_etb_cs_ops;
 extern const struct coresight_ops tmc_etf_cs_ops;
 
@@ -270,6 +272,7 @@  ssize_t tmc_etb_get_sysfs_trace(struct tmc_drvdata *drvdata,
 /* ETR functions */
 int tmc_read_prepare_etr(struct tmc_drvdata *drvdata);
 int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata);
+void tmc_etr_disable_hw(struct tmc_drvdata *drvdata);
 extern const struct coresight_ops tmc_etr_cs_ops;
 ssize_t tmc_etr_get_sysfs_trace(struct tmc_drvdata *drvdata,
 				loff_t pos, size_t len, char **bufpp);