diff mbox series

[v1,5/5] coresight: etm4x: save/restore state across CPU low power states

Message ID 20190618125433.9739-6-andrew.murray@arm.com (mailing list archive)
State New, archived
Headers show
Series coresight: etm4x: save/restore ETMv4 context across CPU low power states | expand

Commit Message

Andrew Murray June 18, 2019, 12:54 p.m. UTC
Some hardware will ignore bit TRCPDCR.PU which is used to signal
to hardware that power should not be removed from the trace unit.
Let's mitigate against this by saving and restoring the trace
unit state when the CPU enters low power states.

To provide the benefit to both self-hosted and external debuggers
we save/restore the entire state which includes etmv4_config data
and dynamic data such as inflight counter values, sequencer
states, etc.

To reduce CPU suspend/resume latency the state is only saved or
restored if coresight is in use as determined by the claimset
registers.

To aid debug of CPU suspend/resume a disable_pm_save parameter
is provided to disable this feature.

Signed-off-by: Andrew Murray <andrew.murray@arm.com>
---
 drivers/hwtracing/coresight/coresight-etm4x.c | 245 ++++++++++++++++++
 drivers/hwtracing/coresight/coresight-etm4x.h |  66 ++++-
 drivers/hwtracing/coresight/coresight.c       |   2 +-
 include/linux/coresight.h                     |   7 +
 4 files changed, 318 insertions(+), 2 deletions(-)

Comments

Sudeep Holla June 18, 2019, 1:21 p.m. UTC | #1
On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
> Some hardware will ignore bit TRCPDCR.PU which is used to signal
> to hardware that power should not be removed from the trace unit.

So, how or can we identify or discover such system ? DT/ACPI ?

> Let's mitigate against this by saving and restoring the trace
> unit state when the CPU enters low power states.
>

I prefer to do this conditionally. It's unnecessary on systems which
don't ignore the TRCPDCR.PU and I really don't like them to be penalised
while we want to add this support for *broken* systems.

This is generally most useful to debug CPU suspend/resume exercising
the code path completely with emulated CPU power on/off as most of the
systems have the trace unit and the CPUs in the same power domain.

Just curious if this reported on any platforms ?

I wounder if we can use TRCPDSR(Power Down Status Register) to check the
status. I know on Juno, it doesn't loose context rather the power down
is emulated and saving/restoring may not be needed at all. Have you
tested on Juno with and without these patches and seen any difference ?

--
Regards,
Sudeep
Mathieu Poirier June 18, 2019, 10:55 p.m. UTC | #2
On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
> Some hardware will ignore bit TRCPDCR.PU which is used to signal
> to hardware that power should not be removed from the trace unit.
> Let's mitigate against this by saving and restoring the trace
> unit state when the CPU enters low power states.
> 
> To provide the benefit to both self-hosted and external debuggers
> we save/restore the entire state which includes etmv4_config data
> and dynamic data such as inflight counter values, sequencer
> states, etc.
> 
> To reduce CPU suspend/resume latency the state is only saved or
> restored if coresight is in use as determined by the claimset
> registers.
> 
> To aid debug of CPU suspend/resume a disable_pm_save parameter
> is provided to disable this feature.
> 
> Signed-off-by: Andrew Murray <andrew.murray@arm.com>
> ---
>  drivers/hwtracing/coresight/coresight-etm4x.c | 245 ++++++++++++++++++
>  drivers/hwtracing/coresight/coresight-etm4x.h |  66 ++++-
>  drivers/hwtracing/coresight/coresight.c       |   2 +-
>  include/linux/coresight.h                     |   7 +
>  4 files changed, 318 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
> index bda90d4cd62b..d27c5e0d9aec 100644
> --- a/drivers/hwtracing/coresight/coresight-etm4x.c
> +++ b/drivers/hwtracing/coresight/coresight-etm4x.c
> @@ -18,6 +18,7 @@
>  #include <linux/stat.h>
>  #include <linux/clk.h>
>  #include <linux/cpu.h>
> +#include <linux/cpu_pm.h>
>  #include <linux/coresight.h>
>  #include <linux/coresight-pmu.h>
>  #include <linux/pm_wakeup.h>
> @@ -36,6 +37,9 @@
>  static int boot_enable;
>  module_param_named(boot_enable, boot_enable, int, 0444);
>  
> +static int disable_pm_save;
> +module_param_named(disable_pm_save, disable_pm_save, int, 0444);
> +
>  /* The number of ETMv4 currently registered */
>  static int etm4_count;
>  static struct etmv4_drvdata *etmdrvdata[NR_CPUS];
> @@ -53,6 +57,14 @@ static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
>  	isb();
>  }
>  
> +static void etm4_os_lock(struct etmv4_drvdata *drvdata)
> +{
> +	/* Writing 0x1 to TRCOSLAR unlocks the trace registers */
> +	writel_relaxed(0x1, drvdata->base + TRCOSLAR);
> +	drvdata->os_unlock = false;
> +	isb();
> +}
> +
>  static bool etm4_arch_supported(u8 arch)
>  {
>  	/* Mask out the minor version number */
> @@ -1076,6 +1088,235 @@ static void etm4_init_trace_id(struct etmv4_drvdata *drvdata)
>  	drvdata->trcid = coresight_get_trace_id(drvdata->cpu);
>  }
>  
> +#ifdef CONFIG_CPU_PM
> +static void etm4_cpu_save(struct etmv4_drvdata *drvdata)
> +{
> +	int i;
> +	u32 control;
> +	struct etmv4_save_state *state;

Before going any further I would make sure the CPU this is running on it equal
to drvdata->cpu.  Otherwise something very wrong happened.

> +
> +	/* As recommended by 3.4.1 of ARM IHI 0064D */
> +	dsb(sy);
> +	isb();
> +
> +	CS_UNLOCK(drvdata->base);
> +	etm4_os_lock(drvdata);

Please add a comment to explain that you are using the OS lock to disable
external debugger access to the trace registers while the unit is powered down.
Otherwise people will get confused and will submit patches that changes
etm4_os_lock() to etm4_os_unlock().

> +
> +	/* wait for TRCSTATR.PMSTABLE to go up */
> +	if (coresight_timeout(drvdata->base, TRCSTATR,
> +					TRCSTATR_PMSTABLE_BIT, 1))
> +		dev_err(drvdata->dev,
> +			"timeout while waiting for Idle Trace Status\n");

The above comment is not accurate since we are waiting for the PMSTABLE bit.

> +
> +	state = &drvdata->save_state;
> +
> +	state->trcprgctlr = readl(drvdata->base + TRCPRGCTLR);
> +	state->trcprocselr = readl(drvdata->base + TRCPROCSELR);
> +	state->trcconfigr = readl(drvdata->base + TRCCONFIGR);
> +	state->trcauxctlr = readl(drvdata->base + TRCAUXCTLR);
> +	state->trceventctl0r = readl(drvdata->base + TRCEVENTCTL0R);
> +	state->trceventctl1r = readl(drvdata->base + TRCEVENTCTL1R);
> +	state->trcstallctlr = readl(drvdata->base + TRCSTALLCTLR);
> +	state->trctsctlr = readl(drvdata->base + TRCTSCTLR);
> +	state->trcsyncpr = readl(drvdata->base + TRCSYNCPR);
> +	state->trcccctlr = readl(drvdata->base + TRCCCCTLR);
> +	state->trcbbctlr = readl(drvdata->base + TRCBBCTLR);
> +	state->trctraceidr = readl(drvdata->base + TRCTRACEIDR);
> +	state->trcqctlr = readl(drvdata->base + TRCQCTLR);
> +
> +	state->trcvictlr = readl(drvdata->base + TRCVICTLR);
> +	state->trcviiectlr = readl(drvdata->base + TRCVIIECTLR);
> +	state->trcvissctlr = readl(drvdata->base + TRCVISSCTLR);
> +	state->trcvipcssctlr = readl(drvdata->base + TRCVIPCSSCTLR);
> +	state->trcvdctlr = readl(drvdata->base + TRCVDCTLR);
> +	state->trcvdsacctlr = readl(drvdata->base + TRCVDSACCTLR);
> +	state->trcvdarcctlr = readl(drvdata->base + TRCVDARCCTLR);
> +
> +	for (i = 0; i < drvdata->nrseqstate; i++)
> +		state->trcseqevr[i] = readl(drvdata->base + TRCSEQEVRn(i));
> +
> +	state->trcseqrstevr = readl(drvdata->base + TRCSEQRSTEVR);
> +	state->trcseqstr = readl(drvdata->base + TRCSEQSTR);
> +	state->trcextinselr = readl(drvdata->base + TRCEXTINSELR);
> +
> +	for (i = 0; i < drvdata->nr_cntr; i++) {
> +		state->trccntrldvr[i] = readl(drvdata->base + TRCCNTRLDVRn(i));
> +		state->trccntctlr[i] = readl(drvdata->base + TRCCNTCTLRn(i));
> +		state->trccntvr[i] = readl(drvdata->base + TRCCNTVRn(i));
> +	}
> +
> +	for (i = 0; i < drvdata->nr_resource * 2; i++)
> +		state->trcrsctlr[i] = readl(drvdata->base + TRCRSCTLRn(i));
> +
> +	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
> +		state->trcssccr[i] = readl(drvdata->base + TRCSSCCRn(i));
> +		state->trcsscsr[i] = readl(drvdata->base + TRCSSCSRn(i));
> +		state->trcsspcicr[i] = readl(drvdata->base + TRCSSPCICRn(i));
> +	}
> +
> +	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
> +		state->trcacvr[i] = readl(drvdata->base + TRCACVRn(i));
> +		state->trcacatr[i] = readl(drvdata->base + TRCACATRn(i));
> +	}
> +
> +	for (i = 0; i < drvdata->numcidc; i++)
> +		state->trccidcvr[i] = readl(drvdata->base + TRCCIDCVRn(i));
> +
> +	for (i = 0; i < drvdata->numvmidc; i++)
> +		state->trcvmidcvr[i] = readl(drvdata->base + TRCVMIDCVRn(i));
> +
> +	state->trccidcctlr0 = readl(drvdata->base + TRCCIDCCTLR0);
> +	state->trccidcctlr1 = readl(drvdata->base + TRCCIDCCTLR1);
> +
> +	state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR0);
> +	state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR1);
> +
> +	state->trcclaimset = readl(drvdata->base + TRCCLAIMCLR);
> +
> +	/* wait for TRCSTATR.IDLE to go up */
> +	if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1))
> +		dev_err(drvdata->dev,
> +			"timeout while waiting for Idle Trace Status\n");
> +
> +	/* power can be removed from the trace unit now */
> +	control = readl_relaxed(drvdata->base + TRCPDCR);
> +	control &= ~TRCPDCR_PU;
> +	writel_relaxed(control, drvdata->base + TRCPDCR);
> +
> +	CS_LOCK(drvdata->base);
> +}
> +
> +static void etm4_cpu_restore(struct etmv4_drvdata *drvdata)
> +{
> +	int i;
> +	struct etmv4_save_state *state;
> +
> +	state = &drvdata->save_state;

Same comment as above about the running CPU.

> +
> +	CS_UNLOCK(drvdata->base);
> +
> +	writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
> +
> +	writel_relaxed(state->trcprgctlr, drvdata->base + TRCPRGCTLR);
> +	writel_relaxed(state->trcprocselr, drvdata->base + TRCPROCSELR);
> +	writel_relaxed(state->trcconfigr, drvdata->base + TRCCONFIGR);
> +	writel_relaxed(state->trcauxctlr, drvdata->base + TRCAUXCTLR);
> +	writel_relaxed(state->trceventctl0r, drvdata->base + TRCEVENTCTL0R);
> +	writel_relaxed(state->trceventctl1r, drvdata->base + TRCEVENTCTL1R);
> +	writel_relaxed(state->trcstallctlr, drvdata->base + TRCSTALLCTLR);
> +	writel_relaxed(state->trctsctlr, drvdata->base + TRCTSCTLR);
> +	writel_relaxed(state->trcsyncpr, drvdata->base + TRCSYNCPR);
> +	writel_relaxed(state->trcccctlr, drvdata->base + TRCCCCTLR);
> +	writel_relaxed(state->trcbbctlr, drvdata->base + TRCBBCTLR);
> +	writel_relaxed(state->trctraceidr, drvdata->base + TRCTRACEIDR);
> +	writel_relaxed(state->trcqctlr, drvdata->base + TRCQCTLR);
> +
> +	writel_relaxed(state->trcvictlr, drvdata->base + TRCVICTLR);
> +	writel_relaxed(state->trcviiectlr, drvdata->base + TRCVIIECTLR);
> +	writel_relaxed(state->trcvissctlr, drvdata->base + TRCVISSCTLR);
> +	writel_relaxed(state->trcvipcssctlr, drvdata->base + TRCVIPCSSCTLR);
> +	writel_relaxed(state->trcvdctlr, drvdata->base + TRCVDCTLR);
> +	writel_relaxed(state->trcvdsacctlr, drvdata->base + TRCVDSACCTLR);
> +	writel_relaxed(state->trcvdarcctlr, drvdata->base + TRCVDARCCTLR);
> +
> +	for (i = 0; i < drvdata->nrseqstate; i++)
> +		writel_relaxed(state->trcseqevr[i],
> +					drvdata->base + TRCSEQEVRn(i));
> +
> +	writel_relaxed(state->trcseqrstevr, drvdata->base + TRCSEQRSTEVR);
> +	writel_relaxed(state->trcseqstr, drvdata->base + TRCSEQSTR);
> +	writel_relaxed(state->trcextinselr, drvdata->base + TRCEXTINSELR);
> +
> +	for (i = 0; i < drvdata->nr_cntr; i++) {
> +		writel_relaxed(state->trccntrldvr[i],
> +					drvdata->base + TRCCNTRLDVRn(i));
> +		writel_relaxed(state->trccntctlr[i],
> +					drvdata->base + TRCCNTCTLRn(i));
> +		writel_relaxed(state->trccntvr[i],
> +					drvdata->base + TRCCNTVRn(i));
> +	}
> +
> +	for (i = 0; i < drvdata->nr_resource * 2; i++)
> +		writel_relaxed(state->trcrsctlr[i],
> +					drvdata->base + TRCRSCTLRn(i));
> +
> +	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
> +		writel_relaxed(state->trcssccr[i],
> +					drvdata->base + TRCSSCCRn(i));
> +		writel_relaxed(state->trcsscsr[i],
> +					drvdata->base + TRCSSCSRn(i));
> +		writel_relaxed(state->trcsspcicr[i],
> +					drvdata->base + TRCSSPCICRn(i));
> +	}
> +
> +	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
> +		writel_relaxed(state->trcacvr[i],
> +					drvdata->base + TRCACVRn(i));
> +		writel_relaxed(state->trcacatr[i],
> +					drvdata->base + TRCACATRn(i));
> +	}
> +
> +	for (i = 0; i < drvdata->numcidc; i++)
> +		writel_relaxed(state->trccidcvr[i],
> +					drvdata->base + TRCCIDCVRn(i));
> +
> +	for (i = 0; i < drvdata->numvmidc; i++)
> +		writel_relaxed(state->trcvmidcvr[i],
> +					drvdata->base + TRCVMIDCVRn(i));
> +
> +	writel_relaxed(state->trccidcctlr0, drvdata->base + TRCCIDCCTLR0);
> +	writel_relaxed(state->trccidcctlr1, drvdata->base + TRCCIDCCTLR1);
> +
> +	writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR0);
> +	writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR1);
> +
> +	writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
> +
> +	/* As recommended by 4.3.7 of ARM IHI 0064D */
> +	dsb(sy);
> +	isb();
> +
> +	etm4_os_unlock(drvdata);

Same comment as above.

> +	CS_LOCK(drvdata->base);
> +}
> +
> +static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
> +			      void *v)
> +{
> +	struct etmv4_drvdata *drvdata = container_of(nb,
> +					struct etmv4_drvdata, nb);
> +
> +	if (disable_pm_save)
> +		return NOTIFY_OK;
> +
> +	switch (cmd) {
> +	case CPU_PM_ENTER:
> +		/* save the state if coresight is in use */
> +		if (coresight_is_claimed_any(drvdata->base))

claimed_any()? At this point if coresight_is_claimed_self_hosted() == false an
external agent is competing with the framework and we should abdicate.

> +			etm4_cpu_save(drvdata);
> +		break;
> +	case CPU_PM_EXIT:
> +	case CPU_PM_ENTER_FAILED:
> +		/* trcclaimset is set when there is state to restore */
> +		if (drvdata->save_state.trcclaimset)
> +			etm4_cpu_restore(drvdata);
> +		break;
> +	default:
> +		return NOTIFY_DONE;
> +	}
> +
> +	return NOTIFY_OK;
> +}
> +
> +static int etm4_cpu_pm_register(struct etmv4_drvdata *drvdata)
> +{
> +	drvdata->nb.notifier_call = etm4_cpu_pm_notify;
> +	return cpu_pm_register_notifier(&drvdata->nb);
> +}
> +#else
> +static int etm4_cpu_pm_register(struct etmv4_drvdata *drvdata) { return 0; }
> +#endif
> +
>  static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
>  {
>  	int ret;
> @@ -1141,6 +1382,10 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
>  	etm4_init_trace_id(drvdata);
>  	etm4_set_default(&drvdata->config);
>  
> +	ret = etm4_cpu_pm_register(drvdata);
> +	if (ret)
> +		goto err_arch_supported;
> +
>  	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
>  	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
>  	desc.ops = &etm4_cs_ops;
> diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
> index 52786e9d8926..f4cff447c8a1 100644
> --- a/drivers/hwtracing/coresight/coresight-etm4x.h
> +++ b/drivers/hwtracing/coresight/coresight-etm4x.h
> @@ -174,7 +174,8 @@
>  					 ETM_MODE_EXCL_KERN | \
>  					 ETM_MODE_EXCL_USER)
>  
> -#define TRCSTATR_IDLE_BIT		0
> +#define TRCSTATR_IDLE_BIT		BIT(0)
> +#define TRCSTATR_PMSTABLE_BIT		BIT(1)
>  #define ETM_DEFAULT_ADDR_COMP		0
>  
>  /* PowerDown Control Register bits */
> @@ -281,6 +282,65 @@ struct etmv4_config {
>  	u32				ext_inp;
>  };
>  
> +/**
> + * struct etm4_save_state - state to be preserved when ETM is without power
> + */
> +struct etmv4_save_state {
> +	u32	trcprgctlr;
> +	u32	trcprocselr;
> +	u32	trcconfigr;
> +	u32	trcauxctlr;
> +	u32	trceventctl0r;
> +	u32	trceventctl1r;
> +	u32	trcstallctlr;
> +	u32	trctsctlr;
> +	u32	trcsyncpr;
> +	u32	trcccctlr;
> +	u32	trcbbctlr;
> +	u32	trctraceidr;
> +	u32	trcqctlr;
> +
> +	u32	trcvictlr;
> +	u32	trcviiectlr;
> +	u32	trcvissctlr;
> +	u32	trcvipcssctlr;
> +	u32	trcvdctlr;
> +	u32	trcvdsacctlr;
> +	u32	trcvdarcctlr;
> +
> +	u32	trcseqevr[ETM_MAX_SEQ_STATES];
> +	u32	trcseqrstevr;
> +	u32	trcseqstr;
> +	u32	trcextinselr;
> +	u32	trccntrldvr[ETMv4_MAX_CNTR];
> +	u32	trccntctlr[ETMv4_MAX_CNTR];
> +	u32	trccntvr[ETMv4_MAX_CNTR];
> +
> +	u32	trcrsctlr[ETM_MAX_RES_SEL * 2];
> +
> +	u32	trcssccr[ETM_MAX_SS_CMP];
> +	u32	trcsscsr[ETM_MAX_SS_CMP];
> +	u32	trcsspcicr[ETM_MAX_SS_CMP];
> +
> +	u64	trcacvr[ETM_MAX_SINGLE_ADDR_CMP];
> +	u64	trcacatr[ETM_MAX_SINGLE_ADDR_CMP];
> +	u64	trcdvcvr[ETM_MAX_DATA_VAL_CMP];
> +	u64	trcdvcmr[ETM_MAX_DATA_VAL_CMP];
> +	u64	trccidcvr[ETMv4_MAX_CTXID_CMP];
> +	u32	trcvmidcvr[ETM_MAX_VMID_CMP];
> +	u32	trccidcctlr0;
> +	u32	trccidcctlr1;
> +	u32	trcvmidcctlr0;
> +	u32	trcvmidcctlr1;
> +
> +	u32	trcclaimset;
> +
> +	u32	cntr_val[ETMv4_MAX_CNTR];
> +	u32	seq_state;
> +	u32	vinst_ctrl;
> +	u32	ss_status[ETM_MAX_SS_CMP];
> +};
> +
>  /**
>   * struct etm4_drvdata - specifics associated to an ETM component
>   * @base:       Memory mapped base address for this component.
> @@ -337,6 +397,8 @@ struct etmv4_config {
>   * @atbtrig:	If the implementation can support ATB triggers
>   * @lpoverride:	If the implementation can support low-power state over.
>   * @config:	structure holding configuration parameters.
> + * @save_state:	State to be preserved across power loss
> + * @nb:		CPU PM notifier
>   */
>  struct etmv4_drvdata {
>  	void __iomem			*base;
> @@ -383,6 +445,8 @@ struct etmv4_drvdata {
>  	bool				atbtrig;
>  	bool				lpoverride;
>  	struct etmv4_config		config;
> +	struct etmv4_save_state		save_state;
> +	struct notifier_block		nb;
>  };
>  
>  /* Address comparator access types */
> diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
> index 4b130281236a..e85d09e597a0 100644
> --- a/drivers/hwtracing/coresight/coresight.c
> +++ b/drivers/hwtracing/coresight/coresight.c
> @@ -140,7 +140,7 @@ static inline bool coresight_is_claimed_self_hosted(void __iomem *base)
>  	return coresight_read_claim_tags(base) == CORESIGHT_CLAIM_SELF_HOSTED;
>  }
>  
> -static inline bool coresight_is_claimed_any(void __iomem *base)
> +bool coresight_is_claimed_any(void __iomem *base)
>  {
>  	return coresight_read_claim_tags(base) != 0;
>  }
> diff --git a/include/linux/coresight.h b/include/linux/coresight.h
> index 62a520df8add..4f7ba923ffc4 100644
> --- a/include/linux/coresight.h
> +++ b/include/linux/coresight.h
> @@ -268,6 +268,8 @@ extern int coresight_claim_device_unlocked(void __iomem *base);
>  extern void coresight_disclaim_device(void __iomem *base);
>  extern void coresight_disclaim_device_unlocked(void __iomem *base);
>  
> +extern bool coresight_is_claimed_any(void __iomem *base);
> +
>  #else
>  static inline struct coresight_device *
>  coresight_register(struct coresight_desc *desc) { return NULL; }
> @@ -290,6 +292,11 @@ static inline int coresight_claim_device(void __iomem *base)
>  static inline void coresight_disclaim_device(void __iomem *base) {}
>  static inline void coresight_disclaim_device_unlocked(void __iomem *base) {}
>  
> +static inline bool coresight_is_claimed_any(void __iomem *base)
> +{
> +	return false;
> +}
> +

I wanted to test your code but it doesn't apply on the CS next branch:

https://git.linaro.org/kernel/coresight.git/log/?h=next

Thanks,
Mathieu

>  #endif
>  
>  #ifdef CONFIG_OF
> -- 
> 2.21.0
>
Suzuki K Poulose June 19, 2019, 10:38 a.m. UTC | #3
Cc: Al Grant, Mike Leach

Hi Sudeep,

On 18/06/2019 14:21, Sudeep Holla wrote:
> On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
>> Some hardware will ignore bit TRCPDCR.PU which is used to signal
>> to hardware that power should not be removed from the trace unit.
> 
> So, how or can we identify or discover such system ? DT/ACPI ?
> 

I don't think there is a mechanism at the moment to identify such
systems. But if we really need to know this information, we could
always think about it.

>> Let's mitigate against this by saving and restoring the trace
>> unit state when the CPU enters low power states.
>>
> 
> I prefer to do this conditionally. It's unnecessary on systems which
> don't ignore the TRCPDCR.PU and I really don't like them to be penalised
> while we want to add this support for *broken* systems.

It is conditional. i.e, you may disable the operation using a kernel/module
parameter, which I think should be mentioned in the description here.

> 
> This is generally most useful to debug CPU suspend/resume exercising
> the code path completely with emulated CPU power on/off as most of the
> systems have the trace unit and the CPUs in the same power domain.

I understand, which is specifically why this comes with an option to handle
such cases.

> 
> Just curious if this reported on any platforms ?
> 

I have heard people complaining about this, but not sure about the exact
platform(s) affected.

> I wounder if we can use TRCPDSR(Power Down Status Register) to check the
> status. I know on Juno, it doesn't loose context rather the power down
> is emulated and saving/restoring may not be needed at all. Have you
> tested on Juno with and without these patches and seen any difference ?

The problem is trace unit looses power the moment CPU goes to low power mode
and if we try to read this register, it could cause unexpected side-effects.

Al, Mike, Mathieu,

What are your thoughts ?

Cheers
Suzuki
Sudeep Holla June 19, 2019, 11:07 a.m. UTC | #4
On Wed, Jun 19, 2019 at 11:38:12AM +0100, Suzuki K Poulose wrote:
> Cc: Al Grant, Mike Leach
>
> Hi Sudeep,
>
> On 18/06/2019 14:21, Sudeep Holla wrote:
> > On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
> > > Some hardware will ignore bit TRCPDCR.PU which is used to signal
> > > to hardware that power should not be removed from the trace unit.
> >
> > So, how or can we identify or discover such system ? DT/ACPI ?
> >
>
> I don't think there is a mechanism at the moment to identify such
> systems. But if we really need to know this information, we could
> always think about it.
>

I prefer that as we shouldn't systems that are not broken.

> > > Let's mitigate against this by saving and restoring the trace
> > > unit state when the CPU enters low power states.
> > >
> >
> > I prefer to do this conditionally. It's unnecessary on systems which
> > don't ignore the TRCPDCR.PU and I really don't like them to be penalised
> > while we want to add this support for *broken* systems.
>
> It is conditional. i.e, you may disable the operation using a kernel/module
> parameter, which I think should be mentioned in the description here.
>

Why should the user of coresight need to know if the corresponding
hardware module is broken or not. I prefer the firmware tell OS.

> >
> > This is generally most useful to debug CPU suspend/resume exercising
> > the code path completely with emulated CPU power on/off as most of the
> > systems have the trace unit and the CPUs in the same power domain.
>
> I understand, which is specifically why this comes with an option to handle
> such cases.
>

OK

> >
> > Just curious if this reported on any platforms ?
> >
>
> I have heard people complaining about this, but not sure about the exact
> platform(s) affected.
>

One we add mechanism in place, platform need to advertise that it's
broken in firmware(DT/ACPI). Or just have a blacklist if we don't
want to add anything extra to the firmware(DT/ACPI) ?

> > I wounder if we can use TRCPDSR(Power Down Status Register) to check the
> > status. I know on Juno, it doesn't loose context rather the power down
> > is emulated and saving/restoring may not be needed at all. Have you
> > tested on Juno with and without these patches and seen any difference ?
>
> The problem is trace unit looses power the moment CPU goes to low power mode
> and if we try to read this register, it could cause unexpected side-effects.
>

No I meant before CPU loose power i.e. in CPU_ENTER case. However I do
remember you/Andrew mentioning that even that may be bogus on broken
systems, so firmware is only way to avoid penalising all platforms IMO.

Or other option is to stop the coresight tracing session like we do
for PMUs or not entering idle when there's any active coresight session
in progress on such platforms.

--
Regards,
Sudeep
Mathieu Poirier June 19, 2019, 4:22 p.m. UTC | #5
On Wed, 19 Jun 2019 at 05:07, Sudeep Holla <sudeep.holla@arm.com> wrote:
>
> On Wed, Jun 19, 2019 at 11:38:12AM +0100, Suzuki K Poulose wrote:
> > Cc: Al Grant, Mike Leach
> >
> > Hi Sudeep,
> >
> > On 18/06/2019 14:21, Sudeep Holla wrote:
> > > On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
> > > > Some hardware will ignore bit TRCPDCR.PU which is used to signal
> > > > to hardware that power should not be removed from the trace unit.
> > >
> > > So, how or can we identify or discover such system ? DT/ACPI ?
> > >
> >
> > I don't think there is a mechanism at the moment to identify such
> > systems. But if we really need to know this information, we could
> > always think about it.
> >
>
> I prefer that as we shouldn't systems that are not broken.
>
> > > > Let's mitigate against this by saving and restoring the trace
> > > > unit state when the CPU enters low power states.
> > > >
> > >
> > > I prefer to do this conditionally. It's unnecessary on systems which
> > > don't ignore the TRCPDCR.PU and I really don't like them to be penalised
> > > while we want to add this support for *broken* systems.
> >
> > It is conditional. i.e, you may disable the operation using a kernel/module
> > parameter, which I think should be mentioned in the description here.
> >
>
> Why should the user of coresight need to know if the corresponding
> hardware module is broken or not. I prefer the firmware tell OS.

I think using ACPI/DT is the best and simplest solution.

>
> > >
> > > This is generally most useful to debug CPU suspend/resume exercising
> > > the code path completely with emulated CPU power on/off as most of the
> > > systems have the trace unit and the CPUs in the same power domain.
> >
> > I understand, which is specifically why this comes with an option to handle
> > such cases.
> >
>
> OK
>
> > >
> > > Just curious if this reported on any platforms ?
> > >
> >
> > I have heard people complaining about this, but not sure about the exact
> > platform(s) affected.

Are you referring to platforms that ignore the TRCPDCR.PU bit?  If so
Juno is the only one that does _not_ ignore it, hence the need to find
another solution.

> >
>
> One we add mechanism in place, platform need to advertise that it's
> broken in firmware(DT/ACPI). Or just have a blacklist if we don't
> want to add anything extra to the firmware(DT/ACPI) ?
>
> > > I wounder if we can use TRCPDSR(Power Down Status Register) to check the
> > > status. I know on Juno, it doesn't loose context rather the power down
> > > is emulated and saving/restoring may not be needed at all. Have you
> > > tested on Juno with and without these patches and seen any difference ?
> >
> > The problem is trace unit looses power the moment CPU goes to low power mode
> > and if we try to read this register, it could cause unexpected side-effects.
> >
>
> No I meant before CPU loose power i.e. in CPU_ENTER case. However I do
> remember you/Andrew mentioning that even that may be bogus on broken
> systems, so firmware is only way to avoid penalising all platforms IMO.

I wouldn't assume that anything is working properly.

>
> Or other option is to stop the coresight tracing session like we do
> for PMUs or not entering idle when there's any active coresight session
> in progress on such platforms.
>
> --
> Regards,
> Sudeep
Andrew Murray June 20, 2019, 11:07 a.m. UTC | #6
On Tue, Jun 18, 2019 at 04:55:49PM -0600, Mathieu Poirier wrote:
> On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
> > Some hardware will ignore bit TRCPDCR.PU which is used to signal
> > to hardware that power should not be removed from the trace unit.
> > Let's mitigate against this by saving and restoring the trace
> > unit state when the CPU enters low power states.
> > 
> > To provide the benefit to both self-hosted and external debuggers
> > we save/restore the entire state which includes etmv4_config data
> > and dynamic data such as inflight counter values, sequencer
> > states, etc.
> > 
> > To reduce CPU suspend/resume latency the state is only saved or
> > restored if coresight is in use as determined by the claimset
> > registers.
> > 
> > To aid debug of CPU suspend/resume a disable_pm_save parameter
> > is provided to disable this feature.
> > 
> > Signed-off-by: Andrew Murray <andrew.murray@arm.com>
> > ---
> >  drivers/hwtracing/coresight/coresight-etm4x.c | 245 ++++++++++++++++++
> >  drivers/hwtracing/coresight/coresight-etm4x.h |  66 ++++-
> >  drivers/hwtracing/coresight/coresight.c       |   2 +-
> >  include/linux/coresight.h                     |   7 +
> >  4 files changed, 318 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
> > index bda90d4cd62b..d27c5e0d9aec 100644
> > --- a/drivers/hwtracing/coresight/coresight-etm4x.c
> > +++ b/drivers/hwtracing/coresight/coresight-etm4x.c
> > @@ -18,6 +18,7 @@
> >  #include <linux/stat.h>
> >  #include <linux/clk.h>
> >  #include <linux/cpu.h>
> > +#include <linux/cpu_pm.h>
> >  #include <linux/coresight.h>
> >  #include <linux/coresight-pmu.h>
> >  #include <linux/pm_wakeup.h>
> > @@ -36,6 +37,9 @@
> >  static int boot_enable;
> >  module_param_named(boot_enable, boot_enable, int, 0444);
> >  
> > +static int disable_pm_save;
> > +module_param_named(disable_pm_save, disable_pm_save, int, 0444);
> > +
> >  /* The number of ETMv4 currently registered */
> >  static int etm4_count;
> >  static struct etmv4_drvdata *etmdrvdata[NR_CPUS];
> > @@ -53,6 +57,14 @@ static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
> >  	isb();
> >  }
> >  
> > +static void etm4_os_lock(struct etmv4_drvdata *drvdata)
> > +{
> > +	/* Writing 0x1 to TRCOSLAR unlocks the trace registers */
> > +	writel_relaxed(0x1, drvdata->base + TRCOSLAR);
> > +	drvdata->os_unlock = false;
> > +	isb();
> > +}
> > +
> >  static bool etm4_arch_supported(u8 arch)
> >  {
> >  	/* Mask out the minor version number */
> > @@ -1076,6 +1088,235 @@ static void etm4_init_trace_id(struct etmv4_drvdata *drvdata)
> >  	drvdata->trcid = coresight_get_trace_id(drvdata->cpu);
> >  }
> >  
> > +#ifdef CONFIG_CPU_PM
> > +static void etm4_cpu_save(struct etmv4_drvdata *drvdata)
> > +{
> > +	int i;
> > +	u32 control;
> > +	struct etmv4_save_state *state;
> 
> Before going any further I would make sure the CPU this is running on it equal
> to drvdata->cpu.  Otherwise something very wrong happened.
> 

Sure I'll add that.

> > +
> > +	/* As recommended by 3.4.1 of ARM IHI 0064D */
> > +	dsb(sy);
> > +	isb();
> > +
> > +	CS_UNLOCK(drvdata->base);
> > +	etm4_os_lock(drvdata);
> 
> Please add a comment to explain that you are using the OS lock to disable
> external debugger access to the trace registers while the unit is powered down.
> Otherwise people will get confused and will submit patches that changes
> etm4_os_lock() to etm4_os_unlock().

Yes sure, it deserves a comment.

> 
> > +
> > +	/* wait for TRCSTATR.PMSTABLE to go up */
> > +	if (coresight_timeout(drvdata->base, TRCSTATR,
> > +					TRCSTATR_PMSTABLE_BIT, 1))
> > +		dev_err(drvdata->dev,
> > +			"timeout while waiting for Idle Trace Status\n");
> 
> The above comment is not accurate since we are waiting for the PMSTABLE bit.

I'll change that.

> 
> > +
> > +	state = &drvdata->save_state;
> > +
> > +	state->trcprgctlr = readl(drvdata->base + TRCPRGCTLR);
> > +	state->trcprocselr = readl(drvdata->base + TRCPROCSELR);
> > +	state->trcconfigr = readl(drvdata->base + TRCCONFIGR);
> > +	state->trcauxctlr = readl(drvdata->base + TRCAUXCTLR);
> > +	state->trceventctl0r = readl(drvdata->base + TRCEVENTCTL0R);
> > +	state->trceventctl1r = readl(drvdata->base + TRCEVENTCTL1R);
> > +	state->trcstallctlr = readl(drvdata->base + TRCSTALLCTLR);
> > +	state->trctsctlr = readl(drvdata->base + TRCTSCTLR);
> > +	state->trcsyncpr = readl(drvdata->base + TRCSYNCPR);
> > +	state->trcccctlr = readl(drvdata->base + TRCCCCTLR);
> > +	state->trcbbctlr = readl(drvdata->base + TRCBBCTLR);
> > +	state->trctraceidr = readl(drvdata->base + TRCTRACEIDR);
> > +	state->trcqctlr = readl(drvdata->base + TRCQCTLR);
> > +
> > +	state->trcvictlr = readl(drvdata->base + TRCVICTLR);
> > +	state->trcviiectlr = readl(drvdata->base + TRCVIIECTLR);
> > +	state->trcvissctlr = readl(drvdata->base + TRCVISSCTLR);
> > +	state->trcvipcssctlr = readl(drvdata->base + TRCVIPCSSCTLR);
> > +	state->trcvdctlr = readl(drvdata->base + TRCVDCTLR);
> > +	state->trcvdsacctlr = readl(drvdata->base + TRCVDSACCTLR);
> > +	state->trcvdarcctlr = readl(drvdata->base + TRCVDARCCTLR);
> > +
> > +	for (i = 0; i < drvdata->nrseqstate; i++)
> > +		state->trcseqevr[i] = readl(drvdata->base + TRCSEQEVRn(i));
> > +
> > +	state->trcseqrstevr = readl(drvdata->base + TRCSEQRSTEVR);
> > +	state->trcseqstr = readl(drvdata->base + TRCSEQSTR);
> > +	state->trcextinselr = readl(drvdata->base + TRCEXTINSELR);
> > +
> > +	for (i = 0; i < drvdata->nr_cntr; i++) {
> > +		state->trccntrldvr[i] = readl(drvdata->base + TRCCNTRLDVRn(i));
> > +		state->trccntctlr[i] = readl(drvdata->base + TRCCNTCTLRn(i));
> > +		state->trccntvr[i] = readl(drvdata->base + TRCCNTVRn(i));
> > +	}
> > +
> > +	for (i = 0; i < drvdata->nr_resource * 2; i++)
> > +		state->trcrsctlr[i] = readl(drvdata->base + TRCRSCTLRn(i));
> > +
> > +	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
> > +		state->trcssccr[i] = readl(drvdata->base + TRCSSCCRn(i));
> > +		state->trcsscsr[i] = readl(drvdata->base + TRCSSCSRn(i));
> > +		state->trcsspcicr[i] = readl(drvdata->base + TRCSSPCICRn(i));
> > +	}
> > +
> > +	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
> > +		state->trcacvr[i] = readl(drvdata->base + TRCACVRn(i));
> > +		state->trcacatr[i] = readl(drvdata->base + TRCACATRn(i));
> > +	}
> > +
> > +	for (i = 0; i < drvdata->numcidc; i++)
> > +		state->trccidcvr[i] = readl(drvdata->base + TRCCIDCVRn(i));
> > +
> > +	for (i = 0; i < drvdata->numvmidc; i++)
> > +		state->trcvmidcvr[i] = readl(drvdata->base + TRCVMIDCVRn(i));
> > +
> > +	state->trccidcctlr0 = readl(drvdata->base + TRCCIDCCTLR0);
> > +	state->trccidcctlr1 = readl(drvdata->base + TRCCIDCCTLR1);
> > +
> > +	state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR0);
> > +	state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR1);
> > +
> > +	state->trcclaimset = readl(drvdata->base + TRCCLAIMCLR);
> > +
> > +	/* wait for TRCSTATR.IDLE to go up */
> > +	if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1))
> > +		dev_err(drvdata->dev,
> > +			"timeout while waiting for Idle Trace Status\n");
> > +
> > +	/* power can be removed from the trace unit now */
> > +	control = readl_relaxed(drvdata->base + TRCPDCR);
> > +	control &= ~TRCPDCR_PU;
> > +	writel_relaxed(control, drvdata->base + TRCPDCR);
> > +
> > +	CS_LOCK(drvdata->base);
> > +}
> > +
> > +static void etm4_cpu_restore(struct etmv4_drvdata *drvdata)
> > +{
> > +	int i;
> > +	struct etmv4_save_state *state;
> > +
> > +	state = &drvdata->save_state;
> 
> Same comment as above about the running CPU.
> 
> > +
> > +	CS_UNLOCK(drvdata->base);
> > +
> > +	writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
> > +
> > +	writel_relaxed(state->trcprgctlr, drvdata->base + TRCPRGCTLR);
> > +	writel_relaxed(state->trcprocselr, drvdata->base + TRCPROCSELR);
> > +	writel_relaxed(state->trcconfigr, drvdata->base + TRCCONFIGR);
> > +	writel_relaxed(state->trcauxctlr, drvdata->base + TRCAUXCTLR);
> > +	writel_relaxed(state->trceventctl0r, drvdata->base + TRCEVENTCTL0R);
> > +	writel_relaxed(state->trceventctl1r, drvdata->base + TRCEVENTCTL1R);
> > +	writel_relaxed(state->trcstallctlr, drvdata->base + TRCSTALLCTLR);
> > +	writel_relaxed(state->trctsctlr, drvdata->base + TRCTSCTLR);
> > +	writel_relaxed(state->trcsyncpr, drvdata->base + TRCSYNCPR);
> > +	writel_relaxed(state->trcccctlr, drvdata->base + TRCCCCTLR);
> > +	writel_relaxed(state->trcbbctlr, drvdata->base + TRCBBCTLR);
> > +	writel_relaxed(state->trctraceidr, drvdata->base + TRCTRACEIDR);
> > +	writel_relaxed(state->trcqctlr, drvdata->base + TRCQCTLR);
> > +
> > +	writel_relaxed(state->trcvictlr, drvdata->base + TRCVICTLR);
> > +	writel_relaxed(state->trcviiectlr, drvdata->base + TRCVIIECTLR);
> > +	writel_relaxed(state->trcvissctlr, drvdata->base + TRCVISSCTLR);
> > +	writel_relaxed(state->trcvipcssctlr, drvdata->base + TRCVIPCSSCTLR);
> > +	writel_relaxed(state->trcvdctlr, drvdata->base + TRCVDCTLR);
> > +	writel_relaxed(state->trcvdsacctlr, drvdata->base + TRCVDSACCTLR);
> > +	writel_relaxed(state->trcvdarcctlr, drvdata->base + TRCVDARCCTLR);
> > +
> > +	for (i = 0; i < drvdata->nrseqstate; i++)
> > +		writel_relaxed(state->trcseqevr[i],
> > +					drvdata->base + TRCSEQEVRn(i));
> > +
> > +	writel_relaxed(state->trcseqrstevr, drvdata->base + TRCSEQRSTEVR);
> > +	writel_relaxed(state->trcseqstr, drvdata->base + TRCSEQSTR);
> > +	writel_relaxed(state->trcextinselr, drvdata->base + TRCEXTINSELR);
> > +
> > +	for (i = 0; i < drvdata->nr_cntr; i++) {
> > +		writel_relaxed(state->trccntrldvr[i],
> > +					drvdata->base + TRCCNTRLDVRn(i));
> > +		writel_relaxed(state->trccntctlr[i],
> > +					drvdata->base + TRCCNTCTLRn(i));
> > +		writel_relaxed(state->trccntvr[i],
> > +					drvdata->base + TRCCNTVRn(i));
> > +	}
> > +
> > +	for (i = 0; i < drvdata->nr_resource * 2; i++)
> > +		writel_relaxed(state->trcrsctlr[i],
> > +					drvdata->base + TRCRSCTLRn(i));
> > +
> > +	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
> > +		writel_relaxed(state->trcssccr[i],
> > +					drvdata->base + TRCSSCCRn(i));
> > +		writel_relaxed(state->trcsscsr[i],
> > +					drvdata->base + TRCSSCSRn(i));
> > +		writel_relaxed(state->trcsspcicr[i],
> > +					drvdata->base + TRCSSPCICRn(i));
> > +	}
> > +
> > +	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
> > +		writel_relaxed(state->trcacvr[i],
> > +					drvdata->base + TRCACVRn(i));
> > +		writel_relaxed(state->trcacatr[i],
> > +					drvdata->base + TRCACATRn(i));
> > +	}
> > +
> > +	for (i = 0; i < drvdata->numcidc; i++)
> > +		writel_relaxed(state->trccidcvr[i],
> > +					drvdata->base + TRCCIDCVRn(i));
> > +
> > +	for (i = 0; i < drvdata->numvmidc; i++)
> > +		writel_relaxed(state->trcvmidcvr[i],
> > +					drvdata->base + TRCVMIDCVRn(i));
> > +
> > +	writel_relaxed(state->trccidcctlr0, drvdata->base + TRCCIDCCTLR0);
> > +	writel_relaxed(state->trccidcctlr1, drvdata->base + TRCCIDCCTLR1);
> > +
> > +	writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR0);
> > +	writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR1);
> > +
> > +	writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
> > +
> > +	/* As recommended by 4.3.7 of ARM IHI 0064D */
> > +	dsb(sy);
> > +	isb();
> > +
> > +	etm4_os_unlock(drvdata);
> 
> Same comment as above.
> 
> > +	CS_LOCK(drvdata->base);
> > +}
> > +
> > +static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
> > +			      void *v)
> > +{
> > +	struct etmv4_drvdata *drvdata = container_of(nb,
> > +					struct etmv4_drvdata, nb);
> > +
> > +	if (disable_pm_save)
> > +		return NOTIFY_OK;
> > +
> > +	switch (cmd) {
> > +	case CPU_PM_ENTER:
> > +		/* save the state if coresight is in use */
> > +		if (coresight_is_claimed_any(drvdata->base))
> 
> claimed_any()? At this point if coresight_is_claimed_self_hosted() == false an
> external agent is competing with the framework and we should abdicate.

If we only support save/restore for self-hosted, then we don't actually need
to store as much state as much of it is in the etmv4_config structure.

My thinking here was that if an external agent is being used and we power down
then we'd also potentially suffer the same issue where state is lost. So
saving/restoring may be helpful for external agents as well (or at least
wouldn't do harm)...

However I don't know if this is a real issue.

> 
> > +			etm4_cpu_save(drvdata);
> > +		break;
> > +	case CPU_PM_EXIT:
> > +	case CPU_PM_ENTER_FAILED:
> > +		/* trcclaimset is set when there is state to restore */
> > +		if (drvdata->save_state.trcclaimset)
> > +			etm4_cpu_restore(drvdata);
> > +		break;
> > +	default:
> > +		return NOTIFY_DONE;
> > +	}
> > +
> > +	return NOTIFY_OK;
> > +}
> > +
> > +static int etm4_cpu_pm_register(struct etmv4_drvdata *drvdata)
> > +{
> > +	drvdata->nb.notifier_call = etm4_cpu_pm_notify;
> > +	return cpu_pm_register_notifier(&drvdata->nb);
> > +}
> > +#else
> > +static int etm4_cpu_pm_register(struct etmv4_drvdata *drvdata) { return 0; }
> > +#endif
> > +
> >  static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
> >  {
> >  	int ret;
> > @@ -1141,6 +1382,10 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
> >  	etm4_init_trace_id(drvdata);
> >  	etm4_set_default(&drvdata->config);
> >  
> > +	ret = etm4_cpu_pm_register(drvdata);
> > +	if (ret)
> > +		goto err_arch_supported;
> > +
> >  	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
> >  	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
> >  	desc.ops = &etm4_cs_ops;
> > diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
> > index 52786e9d8926..f4cff447c8a1 100644
> > --- a/drivers/hwtracing/coresight/coresight-etm4x.h
> > +++ b/drivers/hwtracing/coresight/coresight-etm4x.h
> > @@ -174,7 +174,8 @@
> >  					 ETM_MODE_EXCL_KERN | \
> >  					 ETM_MODE_EXCL_USER)
> >  
> > -#define TRCSTATR_IDLE_BIT		0
> > +#define TRCSTATR_IDLE_BIT		BIT(0)
> > +#define TRCSTATR_PMSTABLE_BIT		BIT(1)
> >  #define ETM_DEFAULT_ADDR_COMP		0
> >  
> >  /* PowerDown Control Register bits */
> > @@ -281,6 +282,65 @@ struct etmv4_config {
> >  	u32				ext_inp;
> >  };
> >  
> > +/**
> > + * struct etm4_save_state - state to be preserved when ETM is without power
> > + */
> > +struct etmv4_save_state {
> > +	u32	trcprgctlr;
> > +	u32	trcprocselr;
> > +	u32	trcconfigr;
> > +	u32	trcauxctlr;
> > +	u32	trceventctl0r;
> > +	u32	trceventctl1r;
> > +	u32	trcstallctlr;
> > +	u32	trctsctlr;
> > +	u32	trcsyncpr;
> > +	u32	trcccctlr;
> > +	u32	trcbbctlr;
> > +	u32	trctraceidr;
> > +	u32	trcqctlr;
> > +
> > +	u32	trcvictlr;
> > +	u32	trcviiectlr;
> > +	u32	trcvissctlr;
> > +	u32	trcvipcssctlr;
> > +	u32	trcvdctlr;
> > +	u32	trcvdsacctlr;
> > +	u32	trcvdarcctlr;
> > +
> > +	u32	trcseqevr[ETM_MAX_SEQ_STATES];
> > +	u32	trcseqrstevr;
> > +	u32	trcseqstr;
> > +	u32	trcextinselr;
> > +	u32	trccntrldvr[ETMv4_MAX_CNTR];
> > +	u32	trccntctlr[ETMv4_MAX_CNTR];
> > +	u32	trccntvr[ETMv4_MAX_CNTR];
> > +
> > +	u32	trcrsctlr[ETM_MAX_RES_SEL * 2];
> > +
> > +	u32	trcssccr[ETM_MAX_SS_CMP];
> > +	u32	trcsscsr[ETM_MAX_SS_CMP];
> > +	u32	trcsspcicr[ETM_MAX_SS_CMP];
> > +
> > +	u64	trcacvr[ETM_MAX_SINGLE_ADDR_CMP];
> > +	u64	trcacatr[ETM_MAX_SINGLE_ADDR_CMP];
> > +	u64	trcdvcvr[ETM_MAX_DATA_VAL_CMP];
> > +	u64	trcdvcmr[ETM_MAX_DATA_VAL_CMP];
> > +	u64	trccidcvr[ETMv4_MAX_CTXID_CMP];
> > +	u32	trcvmidcvr[ETM_MAX_VMID_CMP];
> > +	u32	trccidcctlr0;
> > +	u32	trccidcctlr1;
> > +	u32	trcvmidcctlr0;
> > +	u32	trcvmidcctlr1;
> > +
> > +	u32	trcclaimset;
> > +
> > +	u32	cntr_val[ETMv4_MAX_CNTR];
> > +	u32	seq_state;
> > +	u32	vinst_ctrl;
> > +	u32	ss_status[ETM_MAX_SS_CMP];
> > +};
> > +
> >  /**
> >   * struct etm4_drvdata - specifics associated to an ETM component
> >   * @base:       Memory mapped base address for this component.
> > @@ -337,6 +397,8 @@ struct etmv4_config {
> >   * @atbtrig:	If the implementation can support ATB triggers
> >   * @lpoverride:	If the implementation can support low-power state over.
> >   * @config:	structure holding configuration parameters.
> > + * @save_state:	State to be preserved across power loss
> > + * @nb:		CPU PM notifier
> >   */
> >  struct etmv4_drvdata {
> >  	void __iomem			*base;
> > @@ -383,6 +445,8 @@ struct etmv4_drvdata {
> >  	bool				atbtrig;
> >  	bool				lpoverride;
> >  	struct etmv4_config		config;
> > +	struct etmv4_save_state		save_state;
> > +	struct notifier_block		nb;
> >  };
> >  
> >  /* Address comparator access types */
> > diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
> > index 4b130281236a..e85d09e597a0 100644
> > --- a/drivers/hwtracing/coresight/coresight.c
> > +++ b/drivers/hwtracing/coresight/coresight.c
> > @@ -140,7 +140,7 @@ static inline bool coresight_is_claimed_self_hosted(void __iomem *base)
> >  	return coresight_read_claim_tags(base) == CORESIGHT_CLAIM_SELF_HOSTED;
> >  }
> >  
> > -static inline bool coresight_is_claimed_any(void __iomem *base)
> > +bool coresight_is_claimed_any(void __iomem *base)
> >  {
> >  	return coresight_read_claim_tags(base) != 0;
> >  }
> > diff --git a/include/linux/coresight.h b/include/linux/coresight.h
> > index 62a520df8add..4f7ba923ffc4 100644
> > --- a/include/linux/coresight.h
> > +++ b/include/linux/coresight.h
> > @@ -268,6 +268,8 @@ extern int coresight_claim_device_unlocked(void __iomem *base);
> >  extern void coresight_disclaim_device(void __iomem *base);
> >  extern void coresight_disclaim_device_unlocked(void __iomem *base);
> >  
> > +extern bool coresight_is_claimed_any(void __iomem *base);
> > +
> >  #else
> >  static inline struct coresight_device *
> >  coresight_register(struct coresight_desc *desc) { return NULL; }
> > @@ -290,6 +292,11 @@ static inline int coresight_claim_device(void __iomem *base)
> >  static inline void coresight_disclaim_device(void __iomem *base) {}
> >  static inline void coresight_disclaim_device_unlocked(void __iomem *base) {}
> >  
> > +static inline bool coresight_is_claimed_any(void __iomem *base)
> > +{
> > +	return false;
> > +}
> > +
> 
> I wanted to test your code but it doesn't apply on the CS next branch:
> 
> https://git.linaro.org/kernel/coresight.git/log/?h=next

Oh sorry about that, this was ontop of v5.2-rc5, I'll rebase to the CS branch
on the next iteration.

Thanks for the responsive feedback.

Andrew Murray

> 
> Thanks,
> Mathieu
> 
> >  #endif
> >  
> >  #ifdef CONFIG_OF
> > -- 
> > 2.21.0
> >
Andrew Murray June 20, 2019, 11:41 a.m. UTC | #7
On Wed, Jun 19, 2019 at 10:22:58AM -0600, Mathieu Poirier wrote:
> On Wed, 19 Jun 2019 at 05:07, Sudeep Holla <sudeep.holla@arm.com> wrote:
> >
> > On Wed, Jun 19, 2019 at 11:38:12AM +0100, Suzuki K Poulose wrote:
> > > Cc: Al Grant, Mike Leach
> > >
> > > Hi Sudeep,
> > >
> > > On 18/06/2019 14:21, Sudeep Holla wrote:
> > > > On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
> > > > > Some hardware will ignore bit TRCPDCR.PU which is used to signal
> > > > > to hardware that power should not be removed from the trace unit.
> > > >
> > > > So, how or can we identify or discover such system ? DT/ACPI ?
> > > >
> > >
> > > I don't think there is a mechanism at the moment to identify such
> > > systems. But if we really need to know this information, we could
> > > always think about it.
> > >
> >
> > I prefer that as we shouldn't systems that are not broken.
> >
> > > > > Let's mitigate against this by saving and restoring the trace
> > > > > unit state when the CPU enters low power states.
> > > > >
> > > >
> > > > I prefer to do this conditionally. It's unnecessary on systems which
> > > > don't ignore the TRCPDCR.PU and I really don't like them to be penalised
> > > > while we want to add this support for *broken* systems.
> > >
> > > It is conditional. i.e, you may disable the operation using a kernel/module
> > > parameter, which I think should be mentioned in the description here.
> > >
> >
> > Why should the user of coresight need to know if the corresponding
> > hardware module is broken or not. I prefer the firmware tell OS.
> 
> I think using ACPI/DT is the best and simplest solution.

I certainly agree that it feels wrong to have a default level of support
which is targeted at broken systems. However the penalty (latency) for doing so
doesn't seem high - seeing as this only effects users that are actively using
coresight (I assume self hosted mode is only used as a debug tool, rather than to
obtain metrics during normal use?).

Adding some broken tag in ACPI/DT seems like a good solution - assuming it will
get adopted and used in systems. The existing "disable_pm_save" module option
can be renamed to "enable_pm_save" for those that have less control of their
firmware.

Unless of course we think it's unlikely we'll ever see hardware that isn't
broken - I don't have enough knowledge of how likely or not this is.

Another solution might be to enable save/restore by default (as it is now),
and then on resume we read the hardware registers to determine if state was
lost. If it wasn't lost then we can disable the save/restore feature. (Though
is it possible for systems to be partly broken, e.g. working for some CPUs
but not others?). With this approach on good systems you only get penalised
once.

> 
> >
> > > >
> > > > This is generally most useful to debug CPU suspend/resume exercising
> > > > the code path completely with emulated CPU power on/off as most of the
> > > > systems have the trace unit and the CPUs in the same power domain.
> > >
> > > I understand, which is specifically why this comes with an option to handle
> > > such cases.
> > >
> >
> > OK

I'll update the cover letter and commit messages to reflect that this
option is present. (And likewise for conditionally saving/restoring the
registers only if coresight is in use).

> >
> > > >
> > > > Just curious if this reported on any platforms ?
> > > >
> > >
> > > I have heard people complaining about this, but not sure about the exact
> > > platform(s) affected.
> 
> Are you referring to platforms that ignore the TRCPDCR.PU bit?  If so
> Juno is the only one that does _not_ ignore it, hence the need to find
> another solution.
> 
> > >
> >
> > One we add mechanism in place, platform need to advertise that it's
> > broken in firmware(DT/ACPI). Or just have a blacklist if we don't
> > want to add anything extra to the firmware(DT/ACPI) ?
> >
> > > > I wounder if we can use TRCPDSR(Power Down Status Register) to check the
> > > > status. I know on Juno, it doesn't loose context rather the power down
> > > > is emulated and saving/restoring may not be needed at all. Have you
> > > > tested on Juno with and without these patches and seen any difference ?
> > >
> > > The problem is trace unit looses power the moment CPU goes to low power mode
> > > and if we try to read this register, it could cause unexpected side-effects.
> > >
> >
> > No I meant before CPU loose power i.e. in CPU_ENTER case. However I do
> > remember you/Andrew mentioning that even that may be bogus on broken
> > systems, so firmware is only way to avoid penalising all platforms IMO.
> 
> I wouldn't assume that anything is working properly.

Thanks,

Andrew Murray

> 
> >
> > Or other option is to stop the coresight tracing session like we do
> > for PMUs or not entering idle when there's any active coresight session
> > in progress on such platforms.
> >
> > --
> > Regards,
> > Sudeep
Mathieu Poirier June 20, 2019, 2:49 p.m. UTC | #8
Hi Andrew,

On Thu, 20 Jun 2019 at 05:07, Andrew Murray <andrew.murray@arm.com> wrote:
>
> On Tue, Jun 18, 2019 at 04:55:49PM -0600, Mathieu Poirier wrote:
> > On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
> > > Some hardware will ignore bit TRCPDCR.PU which is used to signal
> > > to hardware that power should not be removed from the trace unit.
> > > Let's mitigate against this by saving and restoring the trace
> > > unit state when the CPU enters low power states.
> > >
> > > To provide the benefit to both self-hosted and external debuggers
> > > we save/restore the entire state which includes etmv4_config data
> > > and dynamic data such as inflight counter values, sequencer
> > > states, etc.
> > >
> > > To reduce CPU suspend/resume latency the state is only saved or
> > > restored if coresight is in use as determined by the claimset
> > > registers.
> > >
> > > To aid debug of CPU suspend/resume a disable_pm_save parameter
> > > is provided to disable this feature.
> > >
> > > Signed-off-by: Andrew Murray <andrew.murray@arm.com>
> > > ---
> > >  drivers/hwtracing/coresight/coresight-etm4x.c | 245 ++++++++++++++++++
> > >  drivers/hwtracing/coresight/coresight-etm4x.h |  66 ++++-
> > >  drivers/hwtracing/coresight/coresight.c       |   2 +-
> > >  include/linux/coresight.h                     |   7 +
> > >  4 files changed, 318 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
> > > index bda90d4cd62b..d27c5e0d9aec 100644
> > > --- a/drivers/hwtracing/coresight/coresight-etm4x.c
> > > +++ b/drivers/hwtracing/coresight/coresight-etm4x.c
> > > @@ -18,6 +18,7 @@
> > >  #include <linux/stat.h>
> > >  #include <linux/clk.h>
> > >  #include <linux/cpu.h>
> > > +#include <linux/cpu_pm.h>
> > >  #include <linux/coresight.h>
> > >  #include <linux/coresight-pmu.h>
> > >  #include <linux/pm_wakeup.h>
> > > @@ -36,6 +37,9 @@
> > >  static int boot_enable;
> > >  module_param_named(boot_enable, boot_enable, int, 0444);
> > >
> > > +static int disable_pm_save;
> > > +module_param_named(disable_pm_save, disable_pm_save, int, 0444);
> > > +
> > >  /* The number of ETMv4 currently registered */
> > >  static int etm4_count;
> > >  static struct etmv4_drvdata *etmdrvdata[NR_CPUS];
> > > @@ -53,6 +57,14 @@ static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
> > >     isb();
> > >  }
> > >
> > > +static void etm4_os_lock(struct etmv4_drvdata *drvdata)
> > > +{
> > > +   /* Writing 0x1 to TRCOSLAR unlocks the trace registers */
> > > +   writel_relaxed(0x1, drvdata->base + TRCOSLAR);
> > > +   drvdata->os_unlock = false;
> > > +   isb();
> > > +}
> > > +
> > >  static bool etm4_arch_supported(u8 arch)
> > >  {
> > >     /* Mask out the minor version number */
> > > @@ -1076,6 +1088,235 @@ static void etm4_init_trace_id(struct etmv4_drvdata *drvdata)
> > >     drvdata->trcid = coresight_get_trace_id(drvdata->cpu);
> > >  }
> > >
> > > +#ifdef CONFIG_CPU_PM
> > > +static void etm4_cpu_save(struct etmv4_drvdata *drvdata)
> > > +{
> > > +   int i;
> > > +   u32 control;
> > > +   struct etmv4_save_state *state;
> >
> > Before going any further I would make sure the CPU this is running on it equal
> > to drvdata->cpu.  Otherwise something very wrong happened.
> >
>
> Sure I'll add that.
>
> > > +
> > > +   /* As recommended by 3.4.1 of ARM IHI 0064D */
> > > +   dsb(sy);
> > > +   isb();
> > > +
> > > +   CS_UNLOCK(drvdata->base);
> > > +   etm4_os_lock(drvdata);
> >
> > Please add a comment to explain that you are using the OS lock to disable
> > external debugger access to the trace registers while the unit is powered down.
> > Otherwise people will get confused and will submit patches that changes
> > etm4_os_lock() to etm4_os_unlock().
>
> Yes sure, it deserves a comment.
>
> >
> > > +
> > > +   /* wait for TRCSTATR.PMSTABLE to go up */
> > > +   if (coresight_timeout(drvdata->base, TRCSTATR,
> > > +                                   TRCSTATR_PMSTABLE_BIT, 1))
> > > +           dev_err(drvdata->dev,
> > > +                   "timeout while waiting for Idle Trace Status\n");
> >
> > The above comment is not accurate since we are waiting for the PMSTABLE bit.
>
> I'll change that.
>
> >
> > > +
> > > +   state = &drvdata->save_state;
> > > +
> > > +   state->trcprgctlr = readl(drvdata->base + TRCPRGCTLR);
> > > +   state->trcprocselr = readl(drvdata->base + TRCPROCSELR);
> > > +   state->trcconfigr = readl(drvdata->base + TRCCONFIGR);
> > > +   state->trcauxctlr = readl(drvdata->base + TRCAUXCTLR);
> > > +   state->trceventctl0r = readl(drvdata->base + TRCEVENTCTL0R);
> > > +   state->trceventctl1r = readl(drvdata->base + TRCEVENTCTL1R);
> > > +   state->trcstallctlr = readl(drvdata->base + TRCSTALLCTLR);
> > > +   state->trctsctlr = readl(drvdata->base + TRCTSCTLR);
> > > +   state->trcsyncpr = readl(drvdata->base + TRCSYNCPR);
> > > +   state->trcccctlr = readl(drvdata->base + TRCCCCTLR);
> > > +   state->trcbbctlr = readl(drvdata->base + TRCBBCTLR);
> > > +   state->trctraceidr = readl(drvdata->base + TRCTRACEIDR);
> > > +   state->trcqctlr = readl(drvdata->base + TRCQCTLR);
> > > +
> > > +   state->trcvictlr = readl(drvdata->base + TRCVICTLR);
> > > +   state->trcviiectlr = readl(drvdata->base + TRCVIIECTLR);
> > > +   state->trcvissctlr = readl(drvdata->base + TRCVISSCTLR);
> > > +   state->trcvipcssctlr = readl(drvdata->base + TRCVIPCSSCTLR);
> > > +   state->trcvdctlr = readl(drvdata->base + TRCVDCTLR);
> > > +   state->trcvdsacctlr = readl(drvdata->base + TRCVDSACCTLR);
> > > +   state->trcvdarcctlr = readl(drvdata->base + TRCVDARCCTLR);
> > > +
> > > +   for (i = 0; i < drvdata->nrseqstate; i++)
> > > +           state->trcseqevr[i] = readl(drvdata->base + TRCSEQEVRn(i));
> > > +
> > > +   state->trcseqrstevr = readl(drvdata->base + TRCSEQRSTEVR);
> > > +   state->trcseqstr = readl(drvdata->base + TRCSEQSTR);
> > > +   state->trcextinselr = readl(drvdata->base + TRCEXTINSELR);
> > > +
> > > +   for (i = 0; i < drvdata->nr_cntr; i++) {
> > > +           state->trccntrldvr[i] = readl(drvdata->base + TRCCNTRLDVRn(i));
> > > +           state->trccntctlr[i] = readl(drvdata->base + TRCCNTCTLRn(i));
> > > +           state->trccntvr[i] = readl(drvdata->base + TRCCNTVRn(i));
> > > +   }
> > > +
> > > +   for (i = 0; i < drvdata->nr_resource * 2; i++)
> > > +           state->trcrsctlr[i] = readl(drvdata->base + TRCRSCTLRn(i));
> > > +
> > > +   for (i = 0; i < drvdata->nr_ss_cmp; i++) {
> > > +           state->trcssccr[i] = readl(drvdata->base + TRCSSCCRn(i));
> > > +           state->trcsscsr[i] = readl(drvdata->base + TRCSSCSRn(i));
> > > +           state->trcsspcicr[i] = readl(drvdata->base + TRCSSPCICRn(i));
> > > +   }
> > > +
> > > +   for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
> > > +           state->trcacvr[i] = readl(drvdata->base + TRCACVRn(i));
> > > +           state->trcacatr[i] = readl(drvdata->base + TRCACATRn(i));
> > > +   }
> > > +
> > > +   for (i = 0; i < drvdata->numcidc; i++)
> > > +           state->trccidcvr[i] = readl(drvdata->base + TRCCIDCVRn(i));
> > > +
> > > +   for (i = 0; i < drvdata->numvmidc; i++)
> > > +           state->trcvmidcvr[i] = readl(drvdata->base + TRCVMIDCVRn(i));
> > > +
> > > +   state->trccidcctlr0 = readl(drvdata->base + TRCCIDCCTLR0);
> > > +   state->trccidcctlr1 = readl(drvdata->base + TRCCIDCCTLR1);
> > > +
> > > +   state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR0);
> > > +   state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR1);
> > > +
> > > +   state->trcclaimset = readl(drvdata->base + TRCCLAIMCLR);
> > > +
> > > +   /* wait for TRCSTATR.IDLE to go up */
> > > +   if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1))
> > > +           dev_err(drvdata->dev,
> > > +                   "timeout while waiting for Idle Trace Status\n");
> > > +
> > > +   /* power can be removed from the trace unit now */
> > > +   control = readl_relaxed(drvdata->base + TRCPDCR);
> > > +   control &= ~TRCPDCR_PU;
> > > +   writel_relaxed(control, drvdata->base + TRCPDCR);
> > > +
> > > +   CS_LOCK(drvdata->base);
> > > +}
> > > +
> > > +static void etm4_cpu_restore(struct etmv4_drvdata *drvdata)
> > > +{
> > > +   int i;
> > > +   struct etmv4_save_state *state;
> > > +
> > > +   state = &drvdata->save_state;
> >
> > Same comment as above about the running CPU.
> >
> > > +
> > > +   CS_UNLOCK(drvdata->base);
> > > +
> > > +   writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
> > > +
> > > +   writel_relaxed(state->trcprgctlr, drvdata->base + TRCPRGCTLR);
> > > +   writel_relaxed(state->trcprocselr, drvdata->base + TRCPROCSELR);
> > > +   writel_relaxed(state->trcconfigr, drvdata->base + TRCCONFIGR);
> > > +   writel_relaxed(state->trcauxctlr, drvdata->base + TRCAUXCTLR);
> > > +   writel_relaxed(state->trceventctl0r, drvdata->base + TRCEVENTCTL0R);
> > > +   writel_relaxed(state->trceventctl1r, drvdata->base + TRCEVENTCTL1R);
> > > +   writel_relaxed(state->trcstallctlr, drvdata->base + TRCSTALLCTLR);
> > > +   writel_relaxed(state->trctsctlr, drvdata->base + TRCTSCTLR);
> > > +   writel_relaxed(state->trcsyncpr, drvdata->base + TRCSYNCPR);
> > > +   writel_relaxed(state->trcccctlr, drvdata->base + TRCCCCTLR);
> > > +   writel_relaxed(state->trcbbctlr, drvdata->base + TRCBBCTLR);
> > > +   writel_relaxed(state->trctraceidr, drvdata->base + TRCTRACEIDR);
> > > +   writel_relaxed(state->trcqctlr, drvdata->base + TRCQCTLR);
> > > +
> > > +   writel_relaxed(state->trcvictlr, drvdata->base + TRCVICTLR);
> > > +   writel_relaxed(state->trcviiectlr, drvdata->base + TRCVIIECTLR);
> > > +   writel_relaxed(state->trcvissctlr, drvdata->base + TRCVISSCTLR);
> > > +   writel_relaxed(state->trcvipcssctlr, drvdata->base + TRCVIPCSSCTLR);
> > > +   writel_relaxed(state->trcvdctlr, drvdata->base + TRCVDCTLR);
> > > +   writel_relaxed(state->trcvdsacctlr, drvdata->base + TRCVDSACCTLR);
> > > +   writel_relaxed(state->trcvdarcctlr, drvdata->base + TRCVDARCCTLR);
> > > +
> > > +   for (i = 0; i < drvdata->nrseqstate; i++)
> > > +           writel_relaxed(state->trcseqevr[i],
> > > +                                   drvdata->base + TRCSEQEVRn(i));
> > > +
> > > +   writel_relaxed(state->trcseqrstevr, drvdata->base + TRCSEQRSTEVR);
> > > +   writel_relaxed(state->trcseqstr, drvdata->base + TRCSEQSTR);
> > > +   writel_relaxed(state->trcextinselr, drvdata->base + TRCEXTINSELR);
> > > +
> > > +   for (i = 0; i < drvdata->nr_cntr; i++) {
> > > +           writel_relaxed(state->trccntrldvr[i],
> > > +                                   drvdata->base + TRCCNTRLDVRn(i));
> > > +           writel_relaxed(state->trccntctlr[i],
> > > +                                   drvdata->base + TRCCNTCTLRn(i));
> > > +           writel_relaxed(state->trccntvr[i],
> > > +                                   drvdata->base + TRCCNTVRn(i));
> > > +   }
> > > +
> > > +   for (i = 0; i < drvdata->nr_resource * 2; i++)
> > > +           writel_relaxed(state->trcrsctlr[i],
> > > +                                   drvdata->base + TRCRSCTLRn(i));
> > > +
> > > +   for (i = 0; i < drvdata->nr_ss_cmp; i++) {
> > > +           writel_relaxed(state->trcssccr[i],
> > > +                                   drvdata->base + TRCSSCCRn(i));
> > > +           writel_relaxed(state->trcsscsr[i],
> > > +                                   drvdata->base + TRCSSCSRn(i));
> > > +           writel_relaxed(state->trcsspcicr[i],
> > > +                                   drvdata->base + TRCSSPCICRn(i));
> > > +   }
> > > +
> > > +   for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
> > > +           writel_relaxed(state->trcacvr[i],
> > > +                                   drvdata->base + TRCACVRn(i));
> > > +           writel_relaxed(state->trcacatr[i],
> > > +                                   drvdata->base + TRCACATRn(i));
> > > +   }
> > > +
> > > +   for (i = 0; i < drvdata->numcidc; i++)
> > > +           writel_relaxed(state->trccidcvr[i],
> > > +                                   drvdata->base + TRCCIDCVRn(i));
> > > +
> > > +   for (i = 0; i < drvdata->numvmidc; i++)
> > > +           writel_relaxed(state->trcvmidcvr[i],
> > > +                                   drvdata->base + TRCVMIDCVRn(i));
> > > +
> > > +   writel_relaxed(state->trccidcctlr0, drvdata->base + TRCCIDCCTLR0);
> > > +   writel_relaxed(state->trccidcctlr1, drvdata->base + TRCCIDCCTLR1);
> > > +
> > > +   writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR0);
> > > +   writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR1);
> > > +
> > > +   writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
> > > +
> > > +   /* As recommended by 4.3.7 of ARM IHI 0064D */
> > > +   dsb(sy);
> > > +   isb();
> > > +
> > > +   etm4_os_unlock(drvdata);
> >
> > Same comment as above.
> >
> > > +   CS_LOCK(drvdata->base);
> > > +}
> > > +
> > > +static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
> > > +                         void *v)
> > > +{
> > > +   struct etmv4_drvdata *drvdata = container_of(nb,
> > > +                                   struct etmv4_drvdata, nb);
> > > +
> > > +   if (disable_pm_save)
> > > +           return NOTIFY_OK;
> > > +
> > > +   switch (cmd) {
> > > +   case CPU_PM_ENTER:
> > > +           /* save the state if coresight is in use */
> > > +           if (coresight_is_claimed_any(drvdata->base))
> >
> > claimed_any()? At this point if coresight_is_claimed_self_hosted() == false an
> > external agent is competing with the framework and we should abdicate.
>
> If we only support save/restore for self-hosted, then we don't actually need
> to store as much state as much of it is in the etmv4_config structure.
>
> My thinking here was that if an external agent is being used and we power down
> then we'd also potentially suffer the same issue where state is lost. So
> saving/restoring may be helpful for external agents as well (or at least
> wouldn't do harm)...

There is no point in trying to cover cases where external agents are
involved - we will always get it wrong.  And the notifiers shouldn't
return immediately if a tracer is not being used.

>
> However I don't know if this is a real issue.
>
> >
> > > +                   etm4_cpu_save(drvdata);
> > > +           break;
> > > +   case CPU_PM_EXIT:
> > > +   case CPU_PM_ENTER_FAILED:
> > > +           /* trcclaimset is set when there is state to restore */
> > > +           if (drvdata->save_state.trcclaimset)
> > > +                   etm4_cpu_restore(drvdata);
> > > +           break;
> > > +   default:
> > > +           return NOTIFY_DONE;
> > > +   }
> > > +
> > > +   return NOTIFY_OK;
> > > +}
> > > +
> > > +static int etm4_cpu_pm_register(struct etmv4_drvdata *drvdata)
> > > +{
> > > +   drvdata->nb.notifier_call = etm4_cpu_pm_notify;
> > > +   return cpu_pm_register_notifier(&drvdata->nb);
> > > +}
> > > +#else
> > > +static int etm4_cpu_pm_register(struct etmv4_drvdata *drvdata) { return 0; }
> > > +#endif
> > > +
> > >  static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
> > >  {
> > >     int ret;
> > > @@ -1141,6 +1382,10 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
> > >     etm4_init_trace_id(drvdata);
> > >     etm4_set_default(&drvdata->config);
> > >
> > > +   ret = etm4_cpu_pm_register(drvdata);
> > > +   if (ret)
> > > +           goto err_arch_supported;
> > > +
> > >     desc.type = CORESIGHT_DEV_TYPE_SOURCE;
> > >     desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
> > >     desc.ops = &etm4_cs_ops;
> > > diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
> > > index 52786e9d8926..f4cff447c8a1 100644
> > > --- a/drivers/hwtracing/coresight/coresight-etm4x.h
> > > +++ b/drivers/hwtracing/coresight/coresight-etm4x.h
> > > @@ -174,7 +174,8 @@
> > >                                      ETM_MODE_EXCL_KERN | \
> > >                                      ETM_MODE_EXCL_USER)
> > >
> > > -#define TRCSTATR_IDLE_BIT          0
> > > +#define TRCSTATR_IDLE_BIT          BIT(0)
> > > +#define TRCSTATR_PMSTABLE_BIT              BIT(1)
> > >  #define ETM_DEFAULT_ADDR_COMP              0
> > >
> > >  /* PowerDown Control Register bits */
> > > @@ -281,6 +282,65 @@ struct etmv4_config {
> > >     u32                             ext_inp;
> > >  };
> > >
> > > +/**
> > > + * struct etm4_save_state - state to be preserved when ETM is without power
> > > + */
> > > +struct etmv4_save_state {
> > > +   u32     trcprgctlr;
> > > +   u32     trcprocselr;
> > > +   u32     trcconfigr;
> > > +   u32     trcauxctlr;
> > > +   u32     trceventctl0r;
> > > +   u32     trceventctl1r;
> > > +   u32     trcstallctlr;
> > > +   u32     trctsctlr;
> > > +   u32     trcsyncpr;
> > > +   u32     trcccctlr;
> > > +   u32     trcbbctlr;
> > > +   u32     trctraceidr;
> > > +   u32     trcqctlr;
> > > +
> > > +   u32     trcvictlr;
> > > +   u32     trcviiectlr;
> > > +   u32     trcvissctlr;
> > > +   u32     trcvipcssctlr;
> > > +   u32     trcvdctlr;
> > > +   u32     trcvdsacctlr;
> > > +   u32     trcvdarcctlr;
> > > +
> > > +   u32     trcseqevr[ETM_MAX_SEQ_STATES];
> > > +   u32     trcseqrstevr;
> > > +   u32     trcseqstr;
> > > +   u32     trcextinselr;
> > > +   u32     trccntrldvr[ETMv4_MAX_CNTR];
> > > +   u32     trccntctlr[ETMv4_MAX_CNTR];
> > > +   u32     trccntvr[ETMv4_MAX_CNTR];
> > > +
> > > +   u32     trcrsctlr[ETM_MAX_RES_SEL * 2];
> > > +
> > > +   u32     trcssccr[ETM_MAX_SS_CMP];
> > > +   u32     trcsscsr[ETM_MAX_SS_CMP];
> > > +   u32     trcsspcicr[ETM_MAX_SS_CMP];
> > > +
> > > +   u64     trcacvr[ETM_MAX_SINGLE_ADDR_CMP];
> > > +   u64     trcacatr[ETM_MAX_SINGLE_ADDR_CMP];
> > > +   u64     trcdvcvr[ETM_MAX_DATA_VAL_CMP];
> > > +   u64     trcdvcmr[ETM_MAX_DATA_VAL_CMP];
> > > +   u64     trccidcvr[ETMv4_MAX_CTXID_CMP];
> > > +   u32     trcvmidcvr[ETM_MAX_VMID_CMP];
> > > +   u32     trccidcctlr0;
> > > +   u32     trccidcctlr1;
> > > +   u32     trcvmidcctlr0;
> > > +   u32     trcvmidcctlr1;
> > > +
> > > +   u32     trcclaimset;
> > > +
> > > +   u32     cntr_val[ETMv4_MAX_CNTR];
> > > +   u32     seq_state;
> > > +   u32     vinst_ctrl;
> > > +   u32     ss_status[ETM_MAX_SS_CMP];
> > > +};
> > > +
> > >  /**
> > >   * struct etm4_drvdata - specifics associated to an ETM component
> > >   * @base:       Memory mapped base address for this component.
> > > @@ -337,6 +397,8 @@ struct etmv4_config {
> > >   * @atbtrig:       If the implementation can support ATB triggers
> > >   * @lpoverride:    If the implementation can support low-power state over.
> > >   * @config:        structure holding configuration parameters.
> > > + * @save_state:    State to be preserved across power loss
> > > + * @nb:            CPU PM notifier
> > >   */
> > >  struct etmv4_drvdata {
> > >     void __iomem                    *base;
> > > @@ -383,6 +445,8 @@ struct etmv4_drvdata {
> > >     bool                            atbtrig;
> > >     bool                            lpoverride;
> > >     struct etmv4_config             config;
> > > +   struct etmv4_save_state         save_state;
> > > +   struct notifier_block           nb;
> > >  };
> > >
> > >  /* Address comparator access types */
> > > diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
> > > index 4b130281236a..e85d09e597a0 100644
> > > --- a/drivers/hwtracing/coresight/coresight.c
> > > +++ b/drivers/hwtracing/coresight/coresight.c
> > > @@ -140,7 +140,7 @@ static inline bool coresight_is_claimed_self_hosted(void __iomem *base)
> > >     return coresight_read_claim_tags(base) == CORESIGHT_CLAIM_SELF_HOSTED;
> > >  }
> > >
> > > -static inline bool coresight_is_claimed_any(void __iomem *base)
> > > +bool coresight_is_claimed_any(void __iomem *base)
> > >  {
> > >     return coresight_read_claim_tags(base) != 0;
> > >  }
> > > diff --git a/include/linux/coresight.h b/include/linux/coresight.h
> > > index 62a520df8add..4f7ba923ffc4 100644
> > > --- a/include/linux/coresight.h
> > > +++ b/include/linux/coresight.h
> > > @@ -268,6 +268,8 @@ extern int coresight_claim_device_unlocked(void __iomem *base);
> > >  extern void coresight_disclaim_device(void __iomem *base);
> > >  extern void coresight_disclaim_device_unlocked(void __iomem *base);
> > >
> > > +extern bool coresight_is_claimed_any(void __iomem *base);
> > > +
> > >  #else
> > >  static inline struct coresight_device *
> > >  coresight_register(struct coresight_desc *desc) { return NULL; }
> > > @@ -290,6 +292,11 @@ static inline int coresight_claim_device(void __iomem *base)
> > >  static inline void coresight_disclaim_device(void __iomem *base) {}
> > >  static inline void coresight_disclaim_device_unlocked(void __iomem *base) {}
> > >
> > > +static inline bool coresight_is_claimed_any(void __iomem *base)
> > > +{
> > > +   return false;
> > > +}
> > > +
> >
> > I wanted to test your code but it doesn't apply on the CS next branch:
> >
> > https://git.linaro.org/kernel/coresight.git/log/?h=next
>
> Oh sorry about that, this was ontop of v5.2-rc5, I'll rebase to the CS branch
> on the next iteration.
>
> Thanks for the responsive feedback.
>
> Andrew Murray
>
> >
> > Thanks,
> > Mathieu
> >
> > >  #endif
> > >
> > >  #ifdef CONFIG_OF
> > > --
> > > 2.21.0
> > >
Mathieu Poirier June 20, 2019, 2:55 p.m. UTC | #9
On Thu, 20 Jun 2019 at 05:41, Andrew Murray <andrew.murray@arm.com> wrote:
>
> On Wed, Jun 19, 2019 at 10:22:58AM -0600, Mathieu Poirier wrote:
> > On Wed, 19 Jun 2019 at 05:07, Sudeep Holla <sudeep.holla@arm.com> wrote:
> > >
> > > On Wed, Jun 19, 2019 at 11:38:12AM +0100, Suzuki K Poulose wrote:
> > > > Cc: Al Grant, Mike Leach
> > > >
> > > > Hi Sudeep,
> > > >
> > > > On 18/06/2019 14:21, Sudeep Holla wrote:
> > > > > On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
> > > > > > Some hardware will ignore bit TRCPDCR.PU which is used to signal
> > > > > > to hardware that power should not be removed from the trace unit.
> > > > >
> > > > > So, how or can we identify or discover such system ? DT/ACPI ?
> > > > >
> > > >
> > > > I don't think there is a mechanism at the moment to identify such
> > > > systems. But if we really need to know this information, we could
> > > > always think about it.
> > > >
> > >
> > > I prefer that as we shouldn't systems that are not broken.
> > >
> > > > > > Let's mitigate against this by saving and restoring the trace
> > > > > > unit state when the CPU enters low power states.
> > > > > >
> > > > >
> > > > > I prefer to do this conditionally. It's unnecessary on systems which
> > > > > don't ignore the TRCPDCR.PU and I really don't like them to be penalised
> > > > > while we want to add this support for *broken* systems.
> > > >
> > > > It is conditional. i.e, you may disable the operation using a kernel/module
> > > > parameter, which I think should be mentioned in the description here.
> > > >
> > >
> > > Why should the user of coresight need to know if the corresponding
> > > hardware module is broken or not. I prefer the firmware tell OS.
> >
> > I think using ACPI/DT is the best and simplest solution.
>
> I certainly agree that it feels wrong to have a default level of support
> which is targeted at broken systems. However the penalty (latency) for doing so
> doesn't seem high - seeing as this only effects users that are actively using
> coresight (I assume self hosted mode is only used as a debug tool, rather than to
> obtain metrics during normal use?).
>
> Adding some broken tag in ACPI/DT seems like a good solution - assuming it will
> get adopted and used in systems. The existing "disable_pm_save" module option
> can be renamed to "enable_pm_save" for those that have less control of their
> firmware.

"enable_pm_save" would work.

>
> Unless of course we think it's unlikely we'll ever see hardware that isn't
> broken - I don't have enough knowledge of how likely or not this is.

The unlikely part is to see platforms where TRCPDCR.PU is taken into
account.  As I previously indicated Juno is the only one that handles
it correctly.

Thanks,
Mathieu

>
> Another solution might be to enable save/restore by default (as it is now),
> and then on resume we read the hardware registers to determine if state was
> lost. If it wasn't lost then we can disable the save/restore feature. (Though
> is it possible for systems to be partly broken, e.g. working for some CPUs
> but not others?). With this approach on good systems you only get penalised
> once.
>
> >
> > >
> > > > >
> > > > > This is generally most useful to debug CPU suspend/resume exercising
> > > > > the code path completely with emulated CPU power on/off as most of the
> > > > > systems have the trace unit and the CPUs in the same power domain.
> > > >
> > > > I understand, which is specifically why this comes with an option to handle
> > > > such cases.
> > > >
> > >
> > > OK
>
> I'll update the cover letter and commit messages to reflect that this
> option is present. (And likewise for conditionally saving/restoring the
> registers only if coresight is in use).
>
> > >
> > > > >
> > > > > Just curious if this reported on any platforms ?
> > > > >
> > > >
> > > > I have heard people complaining about this, but not sure about the exact
> > > > platform(s) affected.
> >
> > Are you referring to platforms that ignore the TRCPDCR.PU bit?  If so
> > Juno is the only one that does _not_ ignore it, hence the need to find
> > another solution.
> >
> > > >
> > >
> > > One we add mechanism in place, platform need to advertise that it's
> > > broken in firmware(DT/ACPI). Or just have a blacklist if we don't
> > > want to add anything extra to the firmware(DT/ACPI) ?
> > >
> > > > > I wounder if we can use TRCPDSR(Power Down Status Register) to check the
> > > > > status. I know on Juno, it doesn't loose context rather the power down
> > > > > is emulated and saving/restoring may not be needed at all. Have you
> > > > > tested on Juno with and without these patches and seen any difference ?
> > > >
> > > > The problem is trace unit looses power the moment CPU goes to low power mode
> > > > and if we try to read this register, it could cause unexpected side-effects.
> > > >
> > >
> > > No I meant before CPU loose power i.e. in CPU_ENTER case. However I do
> > > remember you/Andrew mentioning that even that may be bogus on broken
> > > systems, so firmware is only way to avoid penalising all platforms IMO.
> >
> > I wouldn't assume that anything is working properly.
>
> Thanks,
>
> Andrew Murray
>
> >
> > >
> > > Or other option is to stop the coresight tracing session like we do
> > > for PMUs or not entering idle when there's any active coresight session
> > > in progress on such platforms.
> > >
> > > --
> > > Regards,
> > > Sudeep
Andrew Murray June 20, 2019, 3:11 p.m. UTC | #10
On Thu, Jun 20, 2019 at 08:49:47AM -0600, Mathieu Poirier wrote:
> Hi Andrew,
> 
> On Thu, 20 Jun 2019 at 05:07, Andrew Murray <andrew.murray@arm.com> wrote:
> >
> > On Tue, Jun 18, 2019 at 04:55:49PM -0600, Mathieu Poirier wrote:
> > > On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
> > > > Some hardware will ignore bit TRCPDCR.PU which is used to signal
> > > > to hardware that power should not be removed from the trace unit.
> > > > Let's mitigate against this by saving and restoring the trace
> > > > unit state when the CPU enters low power states.
> > > >
> > > > To provide the benefit to both self-hosted and external debuggers
> > > > we save/restore the entire state which includes etmv4_config data
> > > > and dynamic data such as inflight counter values, sequencer
> > > > states, etc.
> > > >
> > > > To reduce CPU suspend/resume latency the state is only saved or
> > > > restored if coresight is in use as determined by the claimset
> > > > registers.
> > > >
> > > > To aid debug of CPU suspend/resume a disable_pm_save parameter
> > > > is provided to disable this feature.
> > > >
> > > > Signed-off-by: Andrew Murray <andrew.murray@arm.com>
> > > > ---
> > > >  drivers/hwtracing/coresight/coresight-etm4x.c | 245 ++++++++++++++++++
> > > >  drivers/hwtracing/coresight/coresight-etm4x.h |  66 ++++-
> > > >  drivers/hwtracing/coresight/coresight.c       |   2 +-
> > > >  include/linux/coresight.h                     |   7 +
> > > >  4 files changed, 318 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
> > > > index bda90d4cd62b..d27c5e0d9aec 100644
> > > > --- a/drivers/hwtracing/coresight/coresight-etm4x.c
> > > > +++ b/drivers/hwtracing/coresight/coresight-etm4x.c
> > > > @@ -18,6 +18,7 @@
> > > >  #include <linux/stat.h>
> > > >  #include <linux/clk.h>
> > > >  #include <linux/cpu.h>
> > > > +#include <linux/cpu_pm.h>
> > > >  #include <linux/coresight.h>
> > > >  #include <linux/coresight-pmu.h>
> > > >  #include <linux/pm_wakeup.h>
> > > > @@ -36,6 +37,9 @@
> > > >  static int boot_enable;
> > > >  module_param_named(boot_enable, boot_enable, int, 0444);
> > > >
> > > > +static int disable_pm_save;
> > > > +module_param_named(disable_pm_save, disable_pm_save, int, 0444);
> > > > +
> > > >  /* The number of ETMv4 currently registered */
> > > >  static int etm4_count;
> > > >  static struct etmv4_drvdata *etmdrvdata[NR_CPUS];
> > > > @@ -53,6 +57,14 @@ static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
> > > >     isb();
> > > >  }
> > > >
> > > > +static void etm4_os_lock(struct etmv4_drvdata *drvdata)
> > > > +{
> > > > +   /* Writing 0x1 to TRCOSLAR unlocks the trace registers */
> > > > +   writel_relaxed(0x1, drvdata->base + TRCOSLAR);
> > > > +   drvdata->os_unlock = false;
> > > > +   isb();
> > > > +}
> > > > +
> > > >  static bool etm4_arch_supported(u8 arch)
> > > >  {
> > > >     /* Mask out the minor version number */
> > > > @@ -1076,6 +1088,235 @@ static void etm4_init_trace_id(struct etmv4_drvdata *drvdata)
> > > >     drvdata->trcid = coresight_get_trace_id(drvdata->cpu);
> > > >  }
> > > >
> > > > +#ifdef CONFIG_CPU_PM
> > > > +static void etm4_cpu_save(struct etmv4_drvdata *drvdata)
> > > > +{
> > > > +   int i;
> > > > +   u32 control;
> > > > +   struct etmv4_save_state *state;
> > >
> > > Before going any further I would make sure the CPU this is running on it equal
> > > to drvdata->cpu.  Otherwise something very wrong happened.
> > >
> >
> > Sure I'll add that.
> >
> > > > +
> > > > +   /* As recommended by 3.4.1 of ARM IHI 0064D */
> > > > +   dsb(sy);
> > > > +   isb();
> > > > +
> > > > +   CS_UNLOCK(drvdata->base);
> > > > +   etm4_os_lock(drvdata);
> > >
> > > Please add a comment to explain that you are using the OS lock to disable
> > > external debugger access to the trace registers while the unit is powered down.
> > > Otherwise people will get confused and will submit patches that changes
> > > etm4_os_lock() to etm4_os_unlock().
> >
> > Yes sure, it deserves a comment.
> >
> > >
> > > > +
> > > > +   /* wait for TRCSTATR.PMSTABLE to go up */
> > > > +   if (coresight_timeout(drvdata->base, TRCSTATR,
> > > > +                                   TRCSTATR_PMSTABLE_BIT, 1))
> > > > +           dev_err(drvdata->dev,
> > > > +                   "timeout while waiting for Idle Trace Status\n");
> > >
> > > The above comment is not accurate since we are waiting for the PMSTABLE bit.
> >
> > I'll change that.
> >
> > >
> > > > +
> > > > +   state = &drvdata->save_state;
> > > > +
> > > > +   state->trcprgctlr = readl(drvdata->base + TRCPRGCTLR);
> > > > +   state->trcprocselr = readl(drvdata->base + TRCPROCSELR);
> > > > +   state->trcconfigr = readl(drvdata->base + TRCCONFIGR);
> > > > +   state->trcauxctlr = readl(drvdata->base + TRCAUXCTLR);
> > > > +   state->trceventctl0r = readl(drvdata->base + TRCEVENTCTL0R);
> > > > +   state->trceventctl1r = readl(drvdata->base + TRCEVENTCTL1R);
> > > > +   state->trcstallctlr = readl(drvdata->base + TRCSTALLCTLR);
> > > > +   state->trctsctlr = readl(drvdata->base + TRCTSCTLR);
> > > > +   state->trcsyncpr = readl(drvdata->base + TRCSYNCPR);
> > > > +   state->trcccctlr = readl(drvdata->base + TRCCCCTLR);
> > > > +   state->trcbbctlr = readl(drvdata->base + TRCBBCTLR);
> > > > +   state->trctraceidr = readl(drvdata->base + TRCTRACEIDR);
> > > > +   state->trcqctlr = readl(drvdata->base + TRCQCTLR);
> > > > +
> > > > +   state->trcvictlr = readl(drvdata->base + TRCVICTLR);
> > > > +   state->trcviiectlr = readl(drvdata->base + TRCVIIECTLR);
> > > > +   state->trcvissctlr = readl(drvdata->base + TRCVISSCTLR);
> > > > +   state->trcvipcssctlr = readl(drvdata->base + TRCVIPCSSCTLR);
> > > > +   state->trcvdctlr = readl(drvdata->base + TRCVDCTLR);
> > > > +   state->trcvdsacctlr = readl(drvdata->base + TRCVDSACCTLR);
> > > > +   state->trcvdarcctlr = readl(drvdata->base + TRCVDARCCTLR);
> > > > +
> > > > +   for (i = 0; i < drvdata->nrseqstate; i++)
> > > > +           state->trcseqevr[i] = readl(drvdata->base + TRCSEQEVRn(i));
> > > > +
> > > > +   state->trcseqrstevr = readl(drvdata->base + TRCSEQRSTEVR);
> > > > +   state->trcseqstr = readl(drvdata->base + TRCSEQSTR);
> > > > +   state->trcextinselr = readl(drvdata->base + TRCEXTINSELR);
> > > > +
> > > > +   for (i = 0; i < drvdata->nr_cntr; i++) {
> > > > +           state->trccntrldvr[i] = readl(drvdata->base + TRCCNTRLDVRn(i));
> > > > +           state->trccntctlr[i] = readl(drvdata->base + TRCCNTCTLRn(i));
> > > > +           state->trccntvr[i] = readl(drvdata->base + TRCCNTVRn(i));
> > > > +   }
> > > > +
> > > > +   for (i = 0; i < drvdata->nr_resource * 2; i++)
> > > > +           state->trcrsctlr[i] = readl(drvdata->base + TRCRSCTLRn(i));
> > > > +
> > > > +   for (i = 0; i < drvdata->nr_ss_cmp; i++) {
> > > > +           state->trcssccr[i] = readl(drvdata->base + TRCSSCCRn(i));
> > > > +           state->trcsscsr[i] = readl(drvdata->base + TRCSSCSRn(i));
> > > > +           state->trcsspcicr[i] = readl(drvdata->base + TRCSSPCICRn(i));
> > > > +   }
> > > > +
> > > > +   for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
> > > > +           state->trcacvr[i] = readl(drvdata->base + TRCACVRn(i));
> > > > +           state->trcacatr[i] = readl(drvdata->base + TRCACATRn(i));
> > > > +   }
> > > > +
> > > > +   for (i = 0; i < drvdata->numcidc; i++)
> > > > +           state->trccidcvr[i] = readl(drvdata->base + TRCCIDCVRn(i));
> > > > +
> > > > +   for (i = 0; i < drvdata->numvmidc; i++)
> > > > +           state->trcvmidcvr[i] = readl(drvdata->base + TRCVMIDCVRn(i));
> > > > +
> > > > +   state->trccidcctlr0 = readl(drvdata->base + TRCCIDCCTLR0);
> > > > +   state->trccidcctlr1 = readl(drvdata->base + TRCCIDCCTLR1);
> > > > +
> > > > +   state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR0);
> > > > +   state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR1);
> > > > +
> > > > +   state->trcclaimset = readl(drvdata->base + TRCCLAIMCLR);
> > > > +
> > > > +   /* wait for TRCSTATR.IDLE to go up */
> > > > +   if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1))
> > > > +           dev_err(drvdata->dev,
> > > > +                   "timeout while waiting for Idle Trace Status\n");
> > > > +
> > > > +   /* power can be removed from the trace unit now */
> > > > +   control = readl_relaxed(drvdata->base + TRCPDCR);
> > > > +   control &= ~TRCPDCR_PU;
> > > > +   writel_relaxed(control, drvdata->base + TRCPDCR);
> > > > +
> > > > +   CS_LOCK(drvdata->base);
> > > > +}
> > > > +
> > > > +static void etm4_cpu_restore(struct etmv4_drvdata *drvdata)
> > > > +{
> > > > +   int i;
> > > > +   struct etmv4_save_state *state;
> > > > +
> > > > +   state = &drvdata->save_state;
> > >
> > > Same comment as above about the running CPU.
> > >
> > > > +
> > > > +   CS_UNLOCK(drvdata->base);
> > > > +
> > > > +   writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
> > > > +
> > > > +   writel_relaxed(state->trcprgctlr, drvdata->base + TRCPRGCTLR);
> > > > +   writel_relaxed(state->trcprocselr, drvdata->base + TRCPROCSELR);
> > > > +   writel_relaxed(state->trcconfigr, drvdata->base + TRCCONFIGR);
> > > > +   writel_relaxed(state->trcauxctlr, drvdata->base + TRCAUXCTLR);
> > > > +   writel_relaxed(state->trceventctl0r, drvdata->base + TRCEVENTCTL0R);
> > > > +   writel_relaxed(state->trceventctl1r, drvdata->base + TRCEVENTCTL1R);
> > > > +   writel_relaxed(state->trcstallctlr, drvdata->base + TRCSTALLCTLR);
> > > > +   writel_relaxed(state->trctsctlr, drvdata->base + TRCTSCTLR);
> > > > +   writel_relaxed(state->trcsyncpr, drvdata->base + TRCSYNCPR);
> > > > +   writel_relaxed(state->trcccctlr, drvdata->base + TRCCCCTLR);
> > > > +   writel_relaxed(state->trcbbctlr, drvdata->base + TRCBBCTLR);
> > > > +   writel_relaxed(state->trctraceidr, drvdata->base + TRCTRACEIDR);
> > > > +   writel_relaxed(state->trcqctlr, drvdata->base + TRCQCTLR);
> > > > +
> > > > +   writel_relaxed(state->trcvictlr, drvdata->base + TRCVICTLR);
> > > > +   writel_relaxed(state->trcviiectlr, drvdata->base + TRCVIIECTLR);
> > > > +   writel_relaxed(state->trcvissctlr, drvdata->base + TRCVISSCTLR);
> > > > +   writel_relaxed(state->trcvipcssctlr, drvdata->base + TRCVIPCSSCTLR);
> > > > +   writel_relaxed(state->trcvdctlr, drvdata->base + TRCVDCTLR);
> > > > +   writel_relaxed(state->trcvdsacctlr, drvdata->base + TRCVDSACCTLR);
> > > > +   writel_relaxed(state->trcvdarcctlr, drvdata->base + TRCVDARCCTLR);
> > > > +
> > > > +   for (i = 0; i < drvdata->nrseqstate; i++)
> > > > +           writel_relaxed(state->trcseqevr[i],
> > > > +                                   drvdata->base + TRCSEQEVRn(i));
> > > > +
> > > > +   writel_relaxed(state->trcseqrstevr, drvdata->base + TRCSEQRSTEVR);
> > > > +   writel_relaxed(state->trcseqstr, drvdata->base + TRCSEQSTR);
> > > > +   writel_relaxed(state->trcextinselr, drvdata->base + TRCEXTINSELR);
> > > > +
> > > > +   for (i = 0; i < drvdata->nr_cntr; i++) {
> > > > +           writel_relaxed(state->trccntrldvr[i],
> > > > +                                   drvdata->base + TRCCNTRLDVRn(i));
> > > > +           writel_relaxed(state->trccntctlr[i],
> > > > +                                   drvdata->base + TRCCNTCTLRn(i));
> > > > +           writel_relaxed(state->trccntvr[i],
> > > > +                                   drvdata->base + TRCCNTVRn(i));
> > > > +   }
> > > > +
> > > > +   for (i = 0; i < drvdata->nr_resource * 2; i++)
> > > > +           writel_relaxed(state->trcrsctlr[i],
> > > > +                                   drvdata->base + TRCRSCTLRn(i));
> > > > +
> > > > +   for (i = 0; i < drvdata->nr_ss_cmp; i++) {
> > > > +           writel_relaxed(state->trcssccr[i],
> > > > +                                   drvdata->base + TRCSSCCRn(i));
> > > > +           writel_relaxed(state->trcsscsr[i],
> > > > +                                   drvdata->base + TRCSSCSRn(i));
> > > > +           writel_relaxed(state->trcsspcicr[i],
> > > > +                                   drvdata->base + TRCSSPCICRn(i));
> > > > +   }
> > > > +
> > > > +   for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
> > > > +           writel_relaxed(state->trcacvr[i],
> > > > +                                   drvdata->base + TRCACVRn(i));
> > > > +           writel_relaxed(state->trcacatr[i],
> > > > +                                   drvdata->base + TRCACATRn(i));
> > > > +   }
> > > > +
> > > > +   for (i = 0; i < drvdata->numcidc; i++)
> > > > +           writel_relaxed(state->trccidcvr[i],
> > > > +                                   drvdata->base + TRCCIDCVRn(i));
> > > > +
> > > > +   for (i = 0; i < drvdata->numvmidc; i++)
> > > > +           writel_relaxed(state->trcvmidcvr[i],
> > > > +                                   drvdata->base + TRCVMIDCVRn(i));
> > > > +
> > > > +   writel_relaxed(state->trccidcctlr0, drvdata->base + TRCCIDCCTLR0);
> > > > +   writel_relaxed(state->trccidcctlr1, drvdata->base + TRCCIDCCTLR1);
> > > > +
> > > > +   writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR0);
> > > > +   writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR1);
> > > > +
> > > > +   writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
> > > > +
> > > > +   /* As recommended by 4.3.7 of ARM IHI 0064D */
> > > > +   dsb(sy);
> > > > +   isb();
> > > > +
> > > > +   etm4_os_unlock(drvdata);
> > >
> > > Same comment as above.
> > >
> > > > +   CS_LOCK(drvdata->base);
> > > > +}
> > > > +
> > > > +static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
> > > > +                         void *v)
> > > > +{
> > > > +   struct etmv4_drvdata *drvdata = container_of(nb,
> > > > +                                   struct etmv4_drvdata, nb);
> > > > +
> > > > +   if (disable_pm_save)
> > > > +           return NOTIFY_OK;
> > > > +
> > > > +   switch (cmd) {
> > > > +   case CPU_PM_ENTER:
> > > > +           /* save the state if coresight is in use */
> > > > +           if (coresight_is_claimed_any(drvdata->base))
> > >
> > > claimed_any()? At this point if coresight_is_claimed_self_hosted() == false an
> > > external agent is competing with the framework and we should abdicate.
> >
> > If we only support save/restore for self-hosted, then we don't actually need
> > to store as much state as much of it is in the etmv4_config structure.
> >
> > My thinking here was that if an external agent is being used and we power down
> > then we'd also potentially suffer the same issue where state is lost. So
> > saving/restoring may be helpful for external agents as well (or at least
> > wouldn't do harm)...
> 
> There is no point in trying to cover cases where external agents are
> involved - we will always get it wrong.

OK, given that I don't know of any particular cases where this is an issue
I'm happy to change this to !coresight_is_claimed_self_hosted().

>  And the notifiers shouldn't
> return immediately if a tracer is not being used.

What should they do? We only need to save/restore when there is an active
session don't we? Have I misunderstood?

Thanks,

Andrew Murray

> 
> >
> > However I don't know if this is a real issue.
> >
> > >
> > > > +                   etm4_cpu_save(drvdata);
> > > > +           break;
> > > > +   case CPU_PM_EXIT:
> > > > +   case CPU_PM_ENTER_FAILED:
> > > > +           /* trcclaimset is set when there is state to restore */
> > > > +           if (drvdata->save_state.trcclaimset)
> > > > +                   etm4_cpu_restore(drvdata);
> > > > +           break;
> > > > +   default:
> > > > +           return NOTIFY_DONE;
> > > > +   }
> > > > +
> > > > +   return NOTIFY_OK;
> > > > +}
> > > > +
> > > > +static int etm4_cpu_pm_register(struct etmv4_drvdata *drvdata)
> > > > +{
> > > > +   drvdata->nb.notifier_call = etm4_cpu_pm_notify;
> > > > +   return cpu_pm_register_notifier(&drvdata->nb);
> > > > +}
> > > > +#else
> > > > +static int etm4_cpu_pm_register(struct etmv4_drvdata *drvdata) { return 0; }
> > > > +#endif
> > > > +
> > > >  static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
> > > >  {
> > > >     int ret;
> > > > @@ -1141,6 +1382,10 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
> > > >     etm4_init_trace_id(drvdata);
> > > >     etm4_set_default(&drvdata->config);
> > > >
> > > > +   ret = etm4_cpu_pm_register(drvdata);
> > > > +   if (ret)
> > > > +           goto err_arch_supported;
> > > > +
> > > >     desc.type = CORESIGHT_DEV_TYPE_SOURCE;
> > > >     desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
> > > >     desc.ops = &etm4_cs_ops;
> > > > diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
> > > > index 52786e9d8926..f4cff447c8a1 100644
> > > > --- a/drivers/hwtracing/coresight/coresight-etm4x.h
> > > > +++ b/drivers/hwtracing/coresight/coresight-etm4x.h
> > > > @@ -174,7 +174,8 @@
> > > >                                      ETM_MODE_EXCL_KERN | \
> > > >                                      ETM_MODE_EXCL_USER)
> > > >
> > > > -#define TRCSTATR_IDLE_BIT          0
> > > > +#define TRCSTATR_IDLE_BIT          BIT(0)
> > > > +#define TRCSTATR_PMSTABLE_BIT              BIT(1)
> > > >  #define ETM_DEFAULT_ADDR_COMP              0
> > > >
> > > >  /* PowerDown Control Register bits */
> > > > @@ -281,6 +282,65 @@ struct etmv4_config {
> > > >     u32                             ext_inp;
> > > >  };
> > > >
> > > > +/**
> > > > + * struct etm4_save_state - state to be preserved when ETM is without power
> > > > + */
> > > > +struct etmv4_save_state {
> > > > +   u32     trcprgctlr;
> > > > +   u32     trcprocselr;
> > > > +   u32     trcconfigr;
> > > > +   u32     trcauxctlr;
> > > > +   u32     trceventctl0r;
> > > > +   u32     trceventctl1r;
> > > > +   u32     trcstallctlr;
> > > > +   u32     trctsctlr;
> > > > +   u32     trcsyncpr;
> > > > +   u32     trcccctlr;
> > > > +   u32     trcbbctlr;
> > > > +   u32     trctraceidr;
> > > > +   u32     trcqctlr;
> > > > +
> > > > +   u32     trcvictlr;
> > > > +   u32     trcviiectlr;
> > > > +   u32     trcvissctlr;
> > > > +   u32     trcvipcssctlr;
> > > > +   u32     trcvdctlr;
> > > > +   u32     trcvdsacctlr;
> > > > +   u32     trcvdarcctlr;
> > > > +
> > > > +   u32     trcseqevr[ETM_MAX_SEQ_STATES];
> > > > +   u32     trcseqrstevr;
> > > > +   u32     trcseqstr;
> > > > +   u32     trcextinselr;
> > > > +   u32     trccntrldvr[ETMv4_MAX_CNTR];
> > > > +   u32     trccntctlr[ETMv4_MAX_CNTR];
> > > > +   u32     trccntvr[ETMv4_MAX_CNTR];
> > > > +
> > > > +   u32     trcrsctlr[ETM_MAX_RES_SEL * 2];
> > > > +
> > > > +   u32     trcssccr[ETM_MAX_SS_CMP];
> > > > +   u32     trcsscsr[ETM_MAX_SS_CMP];
> > > > +   u32     trcsspcicr[ETM_MAX_SS_CMP];
> > > > +
> > > > +   u64     trcacvr[ETM_MAX_SINGLE_ADDR_CMP];
> > > > +   u64     trcacatr[ETM_MAX_SINGLE_ADDR_CMP];
> > > > +   u64     trcdvcvr[ETM_MAX_DATA_VAL_CMP];
> > > > +   u64     trcdvcmr[ETM_MAX_DATA_VAL_CMP];
> > > > +   u64     trccidcvr[ETMv4_MAX_CTXID_CMP];
> > > > +   u32     trcvmidcvr[ETM_MAX_VMID_CMP];
> > > > +   u32     trccidcctlr0;
> > > > +   u32     trccidcctlr1;
> > > > +   u32     trcvmidcctlr0;
> > > > +   u32     trcvmidcctlr1;
> > > > +
> > > > +   u32     trcclaimset;
> > > > +
> > > > +   u32     cntr_val[ETMv4_MAX_CNTR];
> > > > +   u32     seq_state;
> > > > +   u32     vinst_ctrl;
> > > > +   u32     ss_status[ETM_MAX_SS_CMP];
> > > > +};
> > > > +
> > > >  /**
> > > >   * struct etm4_drvdata - specifics associated to an ETM component
> > > >   * @base:       Memory mapped base address for this component.
> > > > @@ -337,6 +397,8 @@ struct etmv4_config {
> > > >   * @atbtrig:       If the implementation can support ATB triggers
> > > >   * @lpoverride:    If the implementation can support low-power state over.
> > > >   * @config:        structure holding configuration parameters.
> > > > + * @save_state:    State to be preserved across power loss
> > > > + * @nb:            CPU PM notifier
> > > >   */
> > > >  struct etmv4_drvdata {
> > > >     void __iomem                    *base;
> > > > @@ -383,6 +445,8 @@ struct etmv4_drvdata {
> > > >     bool                            atbtrig;
> > > >     bool                            lpoverride;
> > > >     struct etmv4_config             config;
> > > > +   struct etmv4_save_state         save_state;
> > > > +   struct notifier_block           nb;
> > > >  };
> > > >
> > > >  /* Address comparator access types */
> > > > diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
> > > > index 4b130281236a..e85d09e597a0 100644
> > > > --- a/drivers/hwtracing/coresight/coresight.c
> > > > +++ b/drivers/hwtracing/coresight/coresight.c
> > > > @@ -140,7 +140,7 @@ static inline bool coresight_is_claimed_self_hosted(void __iomem *base)
> > > >     return coresight_read_claim_tags(base) == CORESIGHT_CLAIM_SELF_HOSTED;
> > > >  }
> > > >
> > > > -static inline bool coresight_is_claimed_any(void __iomem *base)
> > > > +bool coresight_is_claimed_any(void __iomem *base)
> > > >  {
> > > >     return coresight_read_claim_tags(base) != 0;
> > > >  }
> > > > diff --git a/include/linux/coresight.h b/include/linux/coresight.h
> > > > index 62a520df8add..4f7ba923ffc4 100644
> > > > --- a/include/linux/coresight.h
> > > > +++ b/include/linux/coresight.h
> > > > @@ -268,6 +268,8 @@ extern int coresight_claim_device_unlocked(void __iomem *base);
> > > >  extern void coresight_disclaim_device(void __iomem *base);
> > > >  extern void coresight_disclaim_device_unlocked(void __iomem *base);
> > > >
> > > > +extern bool coresight_is_claimed_any(void __iomem *base);
> > > > +
> > > >  #else
> > > >  static inline struct coresight_device *
> > > >  coresight_register(struct coresight_desc *desc) { return NULL; }
> > > > @@ -290,6 +292,11 @@ static inline int coresight_claim_device(void __iomem *base)
> > > >  static inline void coresight_disclaim_device(void __iomem *base) {}
> > > >  static inline void coresight_disclaim_device_unlocked(void __iomem *base) {}
> > > >
> > > > +static inline bool coresight_is_claimed_any(void __iomem *base)
> > > > +{
> > > > +   return false;
> > > > +}
> > > > +
> > >
> > > I wanted to test your code but it doesn't apply on the CS next branch:
> > >
> > > https://git.linaro.org/kernel/coresight.git/log/?h=next
> >
> > Oh sorry about that, this was ontop of v5.2-rc5, I'll rebase to the CS branch
> > on the next iteration.
> >
> > Thanks for the responsive feedback.
> >
> > Andrew Murray
> >
> > >
> > > Thanks,
> > > Mathieu
> > >
> > > >  #endif
> > > >
> > > >  #ifdef CONFIG_OF
> > > > --
> > > > 2.21.0
> > > >
Mathieu Poirier June 20, 2019, 3:26 p.m. UTC | #11
On Thu, Jun 20, 2019 at 04:11:34PM +0100, Andrew Murray wrote:
> On Thu, Jun 20, 2019 at 08:49:47AM -0600, Mathieu Poirier wrote:
> > Hi Andrew,
> > 
> > On Thu, 20 Jun 2019 at 05:07, Andrew Murray <andrew.murray@arm.com> wrote:
> > >
> > > On Tue, Jun 18, 2019 at 04:55:49PM -0600, Mathieu Poirier wrote:
> > > > On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
> > > > > Some hardware will ignore bit TRCPDCR.PU which is used to signal
> > > > > to hardware that power should not be removed from the trace unit.
> > > > > Let's mitigate against this by saving and restoring the trace
> > > > > unit state when the CPU enters low power states.
> > > > >
> > > > > To provide the benefit to both self-hosted and external debuggers
> > > > > we save/restore the entire state which includes etmv4_config data
> > > > > and dynamic data such as inflight counter values, sequencer
> > > > > states, etc.
> > > > >
> > > > > To reduce CPU suspend/resume latency the state is only saved or
> > > > > restored if coresight is in use as determined by the claimset
> > > > > registers.
> > > > >
> > > > > To aid debug of CPU suspend/resume a disable_pm_save parameter
> > > > > is provided to disable this feature.
> > > > >
> > > > > Signed-off-by: Andrew Murray <andrew.murray@arm.com>
> > > > > ---
> > > > >  drivers/hwtracing/coresight/coresight-etm4x.c | 245 ++++++++++++++++++
> > > > >  drivers/hwtracing/coresight/coresight-etm4x.h |  66 ++++-
> > > > >  drivers/hwtracing/coresight/coresight.c       |   2 +-
> > > > >  include/linux/coresight.h                     |   7 +
> > > > >  4 files changed, 318 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
> > > > > index bda90d4cd62b..d27c5e0d9aec 100644
> > > > > --- a/drivers/hwtracing/coresight/coresight-etm4x.c
> > > > > +++ b/drivers/hwtracing/coresight/coresight-etm4x.c
> > > > > @@ -18,6 +18,7 @@
> > > > >  #include <linux/stat.h>
> > > > >  #include <linux/clk.h>
> > > > >  #include <linux/cpu.h>
> > > > > +#include <linux/cpu_pm.h>
> > > > >  #include <linux/coresight.h>
> > > > >  #include <linux/coresight-pmu.h>
> > > > >  #include <linux/pm_wakeup.h>
> > > > > @@ -36,6 +37,9 @@
> > > > >  static int boot_enable;
> > > > >  module_param_named(boot_enable, boot_enable, int, 0444);
> > > > >
> > > > > +static int disable_pm_save;
> > > > > +module_param_named(disable_pm_save, disable_pm_save, int, 0444);
> > > > > +
> > > > >  /* The number of ETMv4 currently registered */
> > > > >  static int etm4_count;
> > > > >  static struct etmv4_drvdata *etmdrvdata[NR_CPUS];
> > > > > @@ -53,6 +57,14 @@ static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
> > > > >     isb();
> > > > >  }
> > > > >
> > > > > +static void etm4_os_lock(struct etmv4_drvdata *drvdata)
> > > > > +{
> > > > > +   /* Writing 0x1 to TRCOSLAR unlocks the trace registers */
> > > > > +   writel_relaxed(0x1, drvdata->base + TRCOSLAR);
> > > > > +   drvdata->os_unlock = false;
> > > > > +   isb();
> > > > > +}
> > > > > +
> > > > >  static bool etm4_arch_supported(u8 arch)
> > > > >  {
> > > > >     /* Mask out the minor version number */
> > > > > @@ -1076,6 +1088,235 @@ static void etm4_init_trace_id(struct etmv4_drvdata *drvdata)
> > > > >     drvdata->trcid = coresight_get_trace_id(drvdata->cpu);
> > > > >  }
> > > > >
> > > > > +#ifdef CONFIG_CPU_PM
> > > > > +static void etm4_cpu_save(struct etmv4_drvdata *drvdata)
> > > > > +{
> > > > > +   int i;
> > > > > +   u32 control;
> > > > > +   struct etmv4_save_state *state;
> > > >
> > > > Before going any further I would make sure the CPU this is running on it equal
> > > > to drvdata->cpu.  Otherwise something very wrong happened.
> > > >
> > >
> > > Sure I'll add that.
> > >
> > > > > +
> > > > > +   /* As recommended by 3.4.1 of ARM IHI 0064D */
> > > > > +   dsb(sy);
> > > > > +   isb();
> > > > > +
> > > > > +   CS_UNLOCK(drvdata->base);
> > > > > +   etm4_os_lock(drvdata);
> > > >
> > > > Please add a comment to explain that you are using the OS lock to disable
> > > > external debugger access to the trace registers while the unit is powered down.
> > > > Otherwise people will get confused and will submit patches that changes
> > > > etm4_os_lock() to etm4_os_unlock().
> > >
> > > Yes sure, it deserves a comment.
> > >
> > > >
> > > > > +
> > > > > +   /* wait for TRCSTATR.PMSTABLE to go up */
> > > > > +   if (coresight_timeout(drvdata->base, TRCSTATR,
> > > > > +                                   TRCSTATR_PMSTABLE_BIT, 1))
> > > > > +           dev_err(drvdata->dev,
> > > > > +                   "timeout while waiting for Idle Trace Status\n");
> > > >
> > > > The above comment is not accurate since we are waiting for the PMSTABLE bit.
> > >
> > > I'll change that.
> > >
> > > >
> > > > > +
> > > > > +   state = &drvdata->save_state;
> > > > > +
> > > > > +   state->trcprgctlr = readl(drvdata->base + TRCPRGCTLR);
> > > > > +   state->trcprocselr = readl(drvdata->base + TRCPROCSELR);
> > > > > +   state->trcconfigr = readl(drvdata->base + TRCCONFIGR);
> > > > > +   state->trcauxctlr = readl(drvdata->base + TRCAUXCTLR);
> > > > > +   state->trceventctl0r = readl(drvdata->base + TRCEVENTCTL0R);
> > > > > +   state->trceventctl1r = readl(drvdata->base + TRCEVENTCTL1R);
> > > > > +   state->trcstallctlr = readl(drvdata->base + TRCSTALLCTLR);
> > > > > +   state->trctsctlr = readl(drvdata->base + TRCTSCTLR);
> > > > > +   state->trcsyncpr = readl(drvdata->base + TRCSYNCPR);
> > > > > +   state->trcccctlr = readl(drvdata->base + TRCCCCTLR);
> > > > > +   state->trcbbctlr = readl(drvdata->base + TRCBBCTLR);
> > > > > +   state->trctraceidr = readl(drvdata->base + TRCTRACEIDR);
> > > > > +   state->trcqctlr = readl(drvdata->base + TRCQCTLR);
> > > > > +
> > > > > +   state->trcvictlr = readl(drvdata->base + TRCVICTLR);
> > > > > +   state->trcviiectlr = readl(drvdata->base + TRCVIIECTLR);
> > > > > +   state->trcvissctlr = readl(drvdata->base + TRCVISSCTLR);
> > > > > +   state->trcvipcssctlr = readl(drvdata->base + TRCVIPCSSCTLR);
> > > > > +   state->trcvdctlr = readl(drvdata->base + TRCVDCTLR);
> > > > > +   state->trcvdsacctlr = readl(drvdata->base + TRCVDSACCTLR);
> > > > > +   state->trcvdarcctlr = readl(drvdata->base + TRCVDARCCTLR);
> > > > > +
> > > > > +   for (i = 0; i < drvdata->nrseqstate; i++)
> > > > > +           state->trcseqevr[i] = readl(drvdata->base + TRCSEQEVRn(i));
> > > > > +
> > > > > +   state->trcseqrstevr = readl(drvdata->base + TRCSEQRSTEVR);
> > > > > +   state->trcseqstr = readl(drvdata->base + TRCSEQSTR);
> > > > > +   state->trcextinselr = readl(drvdata->base + TRCEXTINSELR);
> > > > > +
> > > > > +   for (i = 0; i < drvdata->nr_cntr; i++) {
> > > > > +           state->trccntrldvr[i] = readl(drvdata->base + TRCCNTRLDVRn(i));
> > > > > +           state->trccntctlr[i] = readl(drvdata->base + TRCCNTCTLRn(i));
> > > > > +           state->trccntvr[i] = readl(drvdata->base + TRCCNTVRn(i));
> > > > > +   }
> > > > > +
> > > > > +   for (i = 0; i < drvdata->nr_resource * 2; i++)
> > > > > +           state->trcrsctlr[i] = readl(drvdata->base + TRCRSCTLRn(i));
> > > > > +
> > > > > +   for (i = 0; i < drvdata->nr_ss_cmp; i++) {
> > > > > +           state->trcssccr[i] = readl(drvdata->base + TRCSSCCRn(i));
> > > > > +           state->trcsscsr[i] = readl(drvdata->base + TRCSSCSRn(i));
> > > > > +           state->trcsspcicr[i] = readl(drvdata->base + TRCSSPCICRn(i));
> > > > > +   }
> > > > > +
> > > > > +   for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
> > > > > +           state->trcacvr[i] = readl(drvdata->base + TRCACVRn(i));
> > > > > +           state->trcacatr[i] = readl(drvdata->base + TRCACATRn(i));
> > > > > +   }
> > > > > +
> > > > > +   for (i = 0; i < drvdata->numcidc; i++)
> > > > > +           state->trccidcvr[i] = readl(drvdata->base + TRCCIDCVRn(i));
> > > > > +
> > > > > +   for (i = 0; i < drvdata->numvmidc; i++)
> > > > > +           state->trcvmidcvr[i] = readl(drvdata->base + TRCVMIDCVRn(i));
> > > > > +
> > > > > +   state->trccidcctlr0 = readl(drvdata->base + TRCCIDCCTLR0);
> > > > > +   state->trccidcctlr1 = readl(drvdata->base + TRCCIDCCTLR1);
> > > > > +
> > > > > +   state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR0);
> > > > > +   state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR1);
> > > > > +
> > > > > +   state->trcclaimset = readl(drvdata->base + TRCCLAIMCLR);
> > > > > +
> > > > > +   /* wait for TRCSTATR.IDLE to go up */
> > > > > +   if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1))
> > > > > +           dev_err(drvdata->dev,
> > > > > +                   "timeout while waiting for Idle Trace Status\n");
> > > > > +
> > > > > +   /* power can be removed from the trace unit now */
> > > > > +   control = readl_relaxed(drvdata->base + TRCPDCR);
> > > > > +   control &= ~TRCPDCR_PU;
> > > > > +   writel_relaxed(control, drvdata->base + TRCPDCR);
> > > > > +
> > > > > +   CS_LOCK(drvdata->base);
> > > > > +}
> > > > > +
> > > > > +static void etm4_cpu_restore(struct etmv4_drvdata *drvdata)
> > > > > +{
> > > > > +   int i;
> > > > > +   struct etmv4_save_state *state;
> > > > > +
> > > > > +   state = &drvdata->save_state;
> > > >
> > > > Same comment as above about the running CPU.
> > > >
> > > > > +
> > > > > +   CS_UNLOCK(drvdata->base);
> > > > > +
> > > > > +   writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
> > > > > +
> > > > > +   writel_relaxed(state->trcprgctlr, drvdata->base + TRCPRGCTLR);
> > > > > +   writel_relaxed(state->trcprocselr, drvdata->base + TRCPROCSELR);
> > > > > +   writel_relaxed(state->trcconfigr, drvdata->base + TRCCONFIGR);
> > > > > +   writel_relaxed(state->trcauxctlr, drvdata->base + TRCAUXCTLR);
> > > > > +   writel_relaxed(state->trceventctl0r, drvdata->base + TRCEVENTCTL0R);
> > > > > +   writel_relaxed(state->trceventctl1r, drvdata->base + TRCEVENTCTL1R);
> > > > > +   writel_relaxed(state->trcstallctlr, drvdata->base + TRCSTALLCTLR);
> > > > > +   writel_relaxed(state->trctsctlr, drvdata->base + TRCTSCTLR);
> > > > > +   writel_relaxed(state->trcsyncpr, drvdata->base + TRCSYNCPR);
> > > > > +   writel_relaxed(state->trcccctlr, drvdata->base + TRCCCCTLR);
> > > > > +   writel_relaxed(state->trcbbctlr, drvdata->base + TRCBBCTLR);
> > > > > +   writel_relaxed(state->trctraceidr, drvdata->base + TRCTRACEIDR);
> > > > > +   writel_relaxed(state->trcqctlr, drvdata->base + TRCQCTLR);
> > > > > +
> > > > > +   writel_relaxed(state->trcvictlr, drvdata->base + TRCVICTLR);
> > > > > +   writel_relaxed(state->trcviiectlr, drvdata->base + TRCVIIECTLR);
> > > > > +   writel_relaxed(state->trcvissctlr, drvdata->base + TRCVISSCTLR);
> > > > > +   writel_relaxed(state->trcvipcssctlr, drvdata->base + TRCVIPCSSCTLR);
> > > > > +   writel_relaxed(state->trcvdctlr, drvdata->base + TRCVDCTLR);
> > > > > +   writel_relaxed(state->trcvdsacctlr, drvdata->base + TRCVDSACCTLR);
> > > > > +   writel_relaxed(state->trcvdarcctlr, drvdata->base + TRCVDARCCTLR);
> > > > > +
> > > > > +   for (i = 0; i < drvdata->nrseqstate; i++)
> > > > > +           writel_relaxed(state->trcseqevr[i],
> > > > > +                                   drvdata->base + TRCSEQEVRn(i));
> > > > > +
> > > > > +   writel_relaxed(state->trcseqrstevr, drvdata->base + TRCSEQRSTEVR);
> > > > > +   writel_relaxed(state->trcseqstr, drvdata->base + TRCSEQSTR);
> > > > > +   writel_relaxed(state->trcextinselr, drvdata->base + TRCEXTINSELR);
> > > > > +
> > > > > +   for (i = 0; i < drvdata->nr_cntr; i++) {
> > > > > +           writel_relaxed(state->trccntrldvr[i],
> > > > > +                                   drvdata->base + TRCCNTRLDVRn(i));
> > > > > +           writel_relaxed(state->trccntctlr[i],
> > > > > +                                   drvdata->base + TRCCNTCTLRn(i));
> > > > > +           writel_relaxed(state->trccntvr[i],
> > > > > +                                   drvdata->base + TRCCNTVRn(i));
> > > > > +   }
> > > > > +
> > > > > +   for (i = 0; i < drvdata->nr_resource * 2; i++)
> > > > > +           writel_relaxed(state->trcrsctlr[i],
> > > > > +                                   drvdata->base + TRCRSCTLRn(i));
> > > > > +
> > > > > +   for (i = 0; i < drvdata->nr_ss_cmp; i++) {
> > > > > +           writel_relaxed(state->trcssccr[i],
> > > > > +                                   drvdata->base + TRCSSCCRn(i));
> > > > > +           writel_relaxed(state->trcsscsr[i],
> > > > > +                                   drvdata->base + TRCSSCSRn(i));
> > > > > +           writel_relaxed(state->trcsspcicr[i],
> > > > > +                                   drvdata->base + TRCSSPCICRn(i));
> > > > > +   }
> > > > > +
> > > > > +   for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
> > > > > +           writel_relaxed(state->trcacvr[i],
> > > > > +                                   drvdata->base + TRCACVRn(i));
> > > > > +           writel_relaxed(state->trcacatr[i],
> > > > > +                                   drvdata->base + TRCACATRn(i));
> > > > > +   }
> > > > > +
> > > > > +   for (i = 0; i < drvdata->numcidc; i++)
> > > > > +           writel_relaxed(state->trccidcvr[i],
> > > > > +                                   drvdata->base + TRCCIDCVRn(i));
> > > > > +
> > > > > +   for (i = 0; i < drvdata->numvmidc; i++)
> > > > > +           writel_relaxed(state->trcvmidcvr[i],
> > > > > +                                   drvdata->base + TRCVMIDCVRn(i));
> > > > > +
> > > > > +   writel_relaxed(state->trccidcctlr0, drvdata->base + TRCCIDCCTLR0);
> > > > > +   writel_relaxed(state->trccidcctlr1, drvdata->base + TRCCIDCCTLR1);
> > > > > +
> > > > > +   writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR0);
> > > > > +   writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR1);
> > > > > +
> > > > > +   writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
> > > > > +
> > > > > +   /* As recommended by 4.3.7 of ARM IHI 0064D */
> > > > > +   dsb(sy);
> > > > > +   isb();
> > > > > +
> > > > > +   etm4_os_unlock(drvdata);
> > > >
> > > > Same comment as above.
> > > >
> > > > > +   CS_LOCK(drvdata->base);
> > > > > +}
> > > > > +
> > > > > +static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
> > > > > +                         void *v)
> > > > > +{
> > > > > +   struct etmv4_drvdata *drvdata = container_of(nb,
> > > > > +                                   struct etmv4_drvdata, nb);
> > > > > +
> > > > > +   if (disable_pm_save)
> > > > > +           return NOTIFY_OK;
> > > > > +
> > > > > +   switch (cmd) {
> > > > > +   case CPU_PM_ENTER:
> > > > > +           /* save the state if coresight is in use */
> > > > > +           if (coresight_is_claimed_any(drvdata->base))
> > > >
> > > > claimed_any()? At this point if coresight_is_claimed_self_hosted() == false an
> > > > external agent is competing with the framework and we should abdicate.
> > >
> > > If we only support save/restore for self-hosted, then we don't actually need
> > > to store as much state as much of it is in the etmv4_config structure.
> > >
> > > My thinking here was that if an external agent is being used and we power down
> > > then we'd also potentially suffer the same issue where state is lost. So
> > > saving/restoring may be helpful for external agents as well (or at least
> > > wouldn't do harm)...
> > 
> > There is no point in trying to cover cases where external agents are
> > involved - we will always get it wrong.
> 
> OK, given that I don't know of any particular cases where this is an issue
> I'm happy to change this to !coresight_is_claimed_self_hosted().
> 
> >  And the notifiers shouldn't
> > return immediately if a tracer is not being used.
> 
> What should they do? We only need to save/restore when there is an active
> session don't we? Have I misunderstood?

Arrggghhh.... Twitchy finger.  

And the notifier *should* return immediately if a tracer is not being used.

> 
> Thanks,
> 
> Andrew Murray
> 
> > 
> > >
> > > However I don't know if this is a real issue.
> > >
> > > >
> > > > > +                   etm4_cpu_save(drvdata);
> > > > > +           break;
> > > > > +   case CPU_PM_EXIT:
> > > > > +   case CPU_PM_ENTER_FAILED:
> > > > > +           /* trcclaimset is set when there is state to restore */
> > > > > +           if (drvdata->save_state.trcclaimset)
> > > > > +                   etm4_cpu_restore(drvdata);
> > > > > +           break;
> > > > > +   default:
> > > > > +           return NOTIFY_DONE;
> > > > > +   }
> > > > > +
> > > > > +   return NOTIFY_OK;
> > > > > +}
> > > > > +
> > > > > +static int etm4_cpu_pm_register(struct etmv4_drvdata *drvdata)
> > > > > +{
> > > > > +   drvdata->nb.notifier_call = etm4_cpu_pm_notify;
> > > > > +   return cpu_pm_register_notifier(&drvdata->nb);
> > > > > +}
> > > > > +#else
> > > > > +static int etm4_cpu_pm_register(struct etmv4_drvdata *drvdata) { return 0; }
> > > > > +#endif
> > > > > +
> > > > >  static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
> > > > >  {
> > > > >     int ret;
> > > > > @@ -1141,6 +1382,10 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
> > > > >     etm4_init_trace_id(drvdata);
> > > > >     etm4_set_default(&drvdata->config);
> > > > >
> > > > > +   ret = etm4_cpu_pm_register(drvdata);
> > > > > +   if (ret)
> > > > > +           goto err_arch_supported;
> > > > > +
> > > > >     desc.type = CORESIGHT_DEV_TYPE_SOURCE;
> > > > >     desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
> > > > >     desc.ops = &etm4_cs_ops;
> > > > > diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
> > > > > index 52786e9d8926..f4cff447c8a1 100644
> > > > > --- a/drivers/hwtracing/coresight/coresight-etm4x.h
> > > > > +++ b/drivers/hwtracing/coresight/coresight-etm4x.h
> > > > > @@ -174,7 +174,8 @@
> > > > >                                      ETM_MODE_EXCL_KERN | \
> > > > >                                      ETM_MODE_EXCL_USER)
> > > > >
> > > > > -#define TRCSTATR_IDLE_BIT          0
> > > > > +#define TRCSTATR_IDLE_BIT          BIT(0)
> > > > > +#define TRCSTATR_PMSTABLE_BIT              BIT(1)
> > > > >  #define ETM_DEFAULT_ADDR_COMP              0
> > > > >
> > > > >  /* PowerDown Control Register bits */
> > > > > @@ -281,6 +282,65 @@ struct etmv4_config {
> > > > >     u32                             ext_inp;
> > > > >  };
> > > > >
> > > > > +/**
> > > > > + * struct etm4_save_state - state to be preserved when ETM is without power
> > > > > + */
> > > > > +struct etmv4_save_state {
> > > > > +   u32     trcprgctlr;
> > > > > +   u32     trcprocselr;
> > > > > +   u32     trcconfigr;
> > > > > +   u32     trcauxctlr;
> > > > > +   u32     trceventctl0r;
> > > > > +   u32     trceventctl1r;
> > > > > +   u32     trcstallctlr;
> > > > > +   u32     trctsctlr;
> > > > > +   u32     trcsyncpr;
> > > > > +   u32     trcccctlr;
> > > > > +   u32     trcbbctlr;
> > > > > +   u32     trctraceidr;
> > > > > +   u32     trcqctlr;
> > > > > +
> > > > > +   u32     trcvictlr;
> > > > > +   u32     trcviiectlr;
> > > > > +   u32     trcvissctlr;
> > > > > +   u32     trcvipcssctlr;
> > > > > +   u32     trcvdctlr;
> > > > > +   u32     trcvdsacctlr;
> > > > > +   u32     trcvdarcctlr;
> > > > > +
> > > > > +   u32     trcseqevr[ETM_MAX_SEQ_STATES];
> > > > > +   u32     trcseqrstevr;
> > > > > +   u32     trcseqstr;
> > > > > +   u32     trcextinselr;
> > > > > +   u32     trccntrldvr[ETMv4_MAX_CNTR];
> > > > > +   u32     trccntctlr[ETMv4_MAX_CNTR];
> > > > > +   u32     trccntvr[ETMv4_MAX_CNTR];
> > > > > +
> > > > > +   u32     trcrsctlr[ETM_MAX_RES_SEL * 2];
> > > > > +
> > > > > +   u32     trcssccr[ETM_MAX_SS_CMP];
> > > > > +   u32     trcsscsr[ETM_MAX_SS_CMP];
> > > > > +   u32     trcsspcicr[ETM_MAX_SS_CMP];
> > > > > +
> > > > > +   u64     trcacvr[ETM_MAX_SINGLE_ADDR_CMP];
> > > > > +   u64     trcacatr[ETM_MAX_SINGLE_ADDR_CMP];
> > > > > +   u64     trcdvcvr[ETM_MAX_DATA_VAL_CMP];
> > > > > +   u64     trcdvcmr[ETM_MAX_DATA_VAL_CMP];
> > > > > +   u64     trccidcvr[ETMv4_MAX_CTXID_CMP];
> > > > > +   u32     trcvmidcvr[ETM_MAX_VMID_CMP];
> > > > > +   u32     trccidcctlr0;
> > > > > +   u32     trccidcctlr1;
> > > > > +   u32     trcvmidcctlr0;
> > > > > +   u32     trcvmidcctlr1;
> > > > > +
> > > > > +   u32     trcclaimset;
> > > > > +
> > > > > +   u32     cntr_val[ETMv4_MAX_CNTR];
> > > > > +   u32     seq_state;
> > > > > +   u32     vinst_ctrl;
> > > > > +   u32     ss_status[ETM_MAX_SS_CMP];
> > > > > +};
> > > > > +
> > > > >  /**
> > > > >   * struct etm4_drvdata - specifics associated to an ETM component
> > > > >   * @base:       Memory mapped base address for this component.
> > > > > @@ -337,6 +397,8 @@ struct etmv4_config {
> > > > >   * @atbtrig:       If the implementation can support ATB triggers
> > > > >   * @lpoverride:    If the implementation can support low-power state over.
> > > > >   * @config:        structure holding configuration parameters.
> > > > > + * @save_state:    State to be preserved across power loss
> > > > > + * @nb:            CPU PM notifier
> > > > >   */
> > > > >  struct etmv4_drvdata {
> > > > >     void __iomem                    *base;
> > > > > @@ -383,6 +445,8 @@ struct etmv4_drvdata {
> > > > >     bool                            atbtrig;
> > > > >     bool                            lpoverride;
> > > > >     struct etmv4_config             config;
> > > > > +   struct etmv4_save_state         save_state;
> > > > > +   struct notifier_block           nb;
> > > > >  };
> > > > >
> > > > >  /* Address comparator access types */
> > > > > diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
> > > > > index 4b130281236a..e85d09e597a0 100644
> > > > > --- a/drivers/hwtracing/coresight/coresight.c
> > > > > +++ b/drivers/hwtracing/coresight/coresight.c
> > > > > @@ -140,7 +140,7 @@ static inline bool coresight_is_claimed_self_hosted(void __iomem *base)
> > > > >     return coresight_read_claim_tags(base) == CORESIGHT_CLAIM_SELF_HOSTED;
> > > > >  }
> > > > >
> > > > > -static inline bool coresight_is_claimed_any(void __iomem *base)
> > > > > +bool coresight_is_claimed_any(void __iomem *base)
> > > > >  {
> > > > >     return coresight_read_claim_tags(base) != 0;
> > > > >  }
> > > > > diff --git a/include/linux/coresight.h b/include/linux/coresight.h
> > > > > index 62a520df8add..4f7ba923ffc4 100644
> > > > > --- a/include/linux/coresight.h
> > > > > +++ b/include/linux/coresight.h
> > > > > @@ -268,6 +268,8 @@ extern int coresight_claim_device_unlocked(void __iomem *base);
> > > > >  extern void coresight_disclaim_device(void __iomem *base);
> > > > >  extern void coresight_disclaim_device_unlocked(void __iomem *base);
> > > > >
> > > > > +extern bool coresight_is_claimed_any(void __iomem *base);
> > > > > +
> > > > >  #else
> > > > >  static inline struct coresight_device *
> > > > >  coresight_register(struct coresight_desc *desc) { return NULL; }
> > > > > @@ -290,6 +292,11 @@ static inline int coresight_claim_device(void __iomem *base)
> > > > >  static inline void coresight_disclaim_device(void __iomem *base) {}
> > > > >  static inline void coresight_disclaim_device_unlocked(void __iomem *base) {}
> > > > >
> > > > > +static inline bool coresight_is_claimed_any(void __iomem *base)
> > > > > +{
> > > > > +   return false;
> > > > > +}
> > > > > +
> > > >
> > > > I wanted to test your code but it doesn't apply on the CS next branch:
> > > >
> > > > https://git.linaro.org/kernel/coresight.git/log/?h=next
> > >
> > > Oh sorry about that, this was ontop of v5.2-rc5, I'll rebase to the CS branch
> > > on the next iteration.
> > >
> > > Thanks for the responsive feedback.
> > >
> > > Andrew Murray
> > >
> > > >
> > > > Thanks,
> > > > Mathieu
> > > >
> > > > >  #endif
> > > > >
> > > > >  #ifdef CONFIG_OF
> > > > > --
> > > > > 2.21.0
> > > > >
Sudeep Holla June 20, 2019, 3:41 p.m. UTC | #12
On Thu, Jun 20, 2019 at 12:41:17PM +0100, Andrew Murray wrote:
> On Wed, Jun 19, 2019 at 10:22:58AM -0600, Mathieu Poirier wrote:
> > On Wed, 19 Jun 2019 at 05:07, Sudeep Holla <sudeep.holla@arm.com> wrote:
> > >
> > > On Wed, Jun 19, 2019 at 11:38:12AM +0100, Suzuki K Poulose wrote:
> > > > Cc: Al Grant, Mike Leach
> > > >
> > > > Hi Sudeep,
> > > >
> > > > On 18/06/2019 14:21, Sudeep Holla wrote:
> > > > > On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
> > > > > > Some hardware will ignore bit TRCPDCR.PU which is used to signal
> > > > > > to hardware that power should not be removed from the trace unit.
> > > > >
> > > > > So, how or can we identify or discover such system ? DT/ACPI ?
> > > > >
> > > >
> > > > I don't think there is a mechanism at the moment to identify such
> > > > systems. But if we really need to know this information, we could
> > > > always think about it.
> > > >
> > >
> > > I prefer that as we shouldn't systems that are not broken.
> > >
> > > > > > Let's mitigate against this by saving and restoring the trace
> > > > > > unit state when the CPU enters low power states.
> > > > > >
> > > > >
> > > > > I prefer to do this conditionally. It's unnecessary on systems which
> > > > > don't ignore the TRCPDCR.PU and I really don't like them to be penalised
> > > > > while we want to add this support for *broken* systems.
> > > >
> > > > It is conditional. i.e, you may disable the operation using a kernel/module
> > > > parameter, which I think should be mentioned in the description here.
> > > >
> > >
> > > Why should the user of coresight need to know if the corresponding
> > > hardware module is broken or not. I prefer the firmware tell OS.
> >
> > I think using ACPI/DT is the best and simplest solution.
>
> I certainly agree that it feels wrong to have a default level of support
> which is targeted at broken systems. However the penalty (latency) for doing so
> doesn't seem high - seeing as this only effects users that are actively using
> coresight (I assume self hosted mode is only used as a debug tool, rather than to
> obtain metrics during normal use?).
>

Do we have numbers ? It's always helpful to have lowest latencies possible
for wakeup and adding extra notifiers will always add some latencies,
so it's not 0. We always want to reduce there notifiers and hopefully
move save/restore to hardware/firmware in future.

> Adding some broken tag in ACPI/DT seems like a good solution - assuming it will
> get adopted and used in systems. The existing "disable_pm_save" module option
> can be renamed to "enable_pm_save" for those that have less control of their
> firmware.
>
> Unless of course we think it's unlikely we'll ever see hardware that isn't
> broken - I don't have enough knowledge of how likely or not this is.
>

Sorry but even then I prefer it not to be default and force extra work
to the people who add support and constantly be reminded that it's
broken and they are deviating from default behaviour in the kernel
which may come and latency penalty.

Making it default may hide the problem if Linux is used for some validation.

Also we hardly have 3-4 platforms in upstream that support coresight,
and many are broken except Juno. But that doesn't imply all others
are broken and we just can't derive that unless we have more information.

--
Regards,
Sudeep
Mathieu Poirier June 20, 2019, 4:14 p.m. UTC | #13
On Thu, 20 Jun 2019 at 09:41, Sudeep Holla <sudeep.holla@arm.com> wrote:
>
> On Thu, Jun 20, 2019 at 12:41:17PM +0100, Andrew Murray wrote:
> > On Wed, Jun 19, 2019 at 10:22:58AM -0600, Mathieu Poirier wrote:
> > > On Wed, 19 Jun 2019 at 05:07, Sudeep Holla <sudeep.holla@arm.com> wrote:
> > > >
> > > > On Wed, Jun 19, 2019 at 11:38:12AM +0100, Suzuki K Poulose wrote:
> > > > > Cc: Al Grant, Mike Leach
> > > > >
> > > > > Hi Sudeep,
> > > > >
> > > > > On 18/06/2019 14:21, Sudeep Holla wrote:
> > > > > > On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
> > > > > > > Some hardware will ignore bit TRCPDCR.PU which is used to signal
> > > > > > > to hardware that power should not be removed from the trace unit.
> > > > > >
> > > > > > So, how or can we identify or discover such system ? DT/ACPI ?
> > > > > >
> > > > >
> > > > > I don't think there is a mechanism at the moment to identify such
> > > > > systems. But if we really need to know this information, we could
> > > > > always think about it.
> > > > >
> > > >
> > > > I prefer that as we shouldn't systems that are not broken.
> > > >
> > > > > > > Let's mitigate against this by saving and restoring the trace
> > > > > > > unit state when the CPU enters low power states.
> > > > > > >
> > > > > >
> > > > > > I prefer to do this conditionally. It's unnecessary on systems which
> > > > > > don't ignore the TRCPDCR.PU and I really don't like them to be penalised
> > > > > > while we want to add this support for *broken* systems.
> > > > >
> > > > > It is conditional. i.e, you may disable the operation using a kernel/module
> > > > > parameter, which I think should be mentioned in the description here.
> > > > >
> > > >
> > > > Why should the user of coresight need to know if the corresponding
> > > > hardware module is broken or not. I prefer the firmware tell OS.
> > >
> > > I think using ACPI/DT is the best and simplest solution.
> >
> > I certainly agree that it feels wrong to have a default level of support
> > which is targeted at broken systems. However the penalty (latency) for doing so
> > doesn't seem high - seeing as this only effects users that are actively using
> > coresight (I assume self hosted mode is only used as a debug tool, rather than to
> > obtain metrics during normal use?).
> >
>
> Do we have numbers ? It's always helpful to have lowest latencies possible
> for wakeup and adding extra notifiers will always add some latencies,
> so it's not 0. We always want to reduce there notifiers and hopefully
> move save/restore to hardware/firmware in future.
>
> > Adding some broken tag in ACPI/DT seems like a good solution - assuming it will
> > get adopted and used in systems. The existing "disable_pm_save" module option
> > can be renamed to "enable_pm_save" for those that have less control of their
> > firmware.
> >
> > Unless of course we think it's unlikely we'll ever see hardware that isn't
> > broken - I don't have enough knowledge of how likely or not this is.
> >
>
> Sorry but even then I prefer it not to be default and force extra work
> to the people who add support and constantly be reminded that it's
> broken and they are deviating from default behaviour in the kernel
> which may come and latency penalty.
>
> Making it default may hide the problem if Linux is used for some validation.
>
> Also we hardly have 3-4 platforms in upstream that support coresight,
> and many are broken except Juno. But that doesn't imply all others
> are broken and we just can't derive that unless we have more information.

For now we have a clear trend.  To me it is not a matter of broken vs.
non-broken but more about what people want to do or can
(realistically) do.

The coresight specification is broad and very permissive in terms of
implementation defined choices.  It is not because the TRCPDCR.PU it
not taken into account by a platform that it is automatically broken.
This could be a design choice or a trade off.  We already have two
ways of putting a CPU to sleep (architected or OS driven), we simply
do the same here for coresight.

>
> --
> Regards,
> Sudeep
Sudeep Holla June 20, 2019, 4:34 p.m. UTC | #14
On Thu, Jun 20, 2019 at 10:14:04AM -0600, Mathieu Poirier wrote:
> On Thu, 20 Jun 2019 at 09:41, Sudeep Holla <sudeep.holla@arm.com> wrote:
> >

[...]

> > Sorry but even then I prefer it not to be default and force extra work
> > to the people who add support and constantly be reminded that it's
> > broken and they are deviating from default behaviour in the kernel
> > which may come and latency penalty.
> >
> > Making it default may hide the problem if Linux is used for some validation.
> >
> > Also we hardly have 3-4 platforms in upstream that support coresight,
> > and many are broken except Juno. But that doesn't imply all others
> > are broken and we just can't derive that unless we have more information.
>
> For now we have a clear trend.  To me it is not a matter of broken vs.
> non-broken but more about what people want to do or can
> (realistically) do.
>

No disagreement there.

> The coresight specification is broad and very permissive in terms of
> implementation defined choices.  It is not because the TRCPDCR.PU it
> not taken into account by a platform that it is automatically broken.
> This could be a design choice or a trade off.  We already have two
> ways of putting a CPU to sleep (architected or OS driven), we simply
> do the same here for coresight.
>

Sure, if the term "broken" is inappropriate I am fine if anything else
is used. The point is we are adding an idle notifier that adds latency
and must be done if and only if necessary.

How you identify that and implement doesn't bother me much, making
that default just based on the fact that more platforms need it
compared to others definitely does. So I am fine if this needs to be
advertised *not broken* but *by design*, sure go for it.

My main concern was additional latency that this introduces for platforms
that don't require.

--
Regards,
Sudeep
Mathieu Poirier June 20, 2019, 4:47 p.m. UTC | #15
On Thu, 20 Jun 2019 at 10:34, Sudeep Holla <sudeep.holla@arm.com> wrote:
>
> On Thu, Jun 20, 2019 at 10:14:04AM -0600, Mathieu Poirier wrote:
> > On Thu, 20 Jun 2019 at 09:41, Sudeep Holla <sudeep.holla@arm.com> wrote:
> > >
>
> [...]
>
> > > Sorry but even then I prefer it not to be default and force extra work
> > > to the people who add support and constantly be reminded that it's
> > > broken and they are deviating from default behaviour in the kernel
> > > which may come and latency penalty.
> > >
> > > Making it default may hide the problem if Linux is used for some validation.
> > >
> > > Also we hardly have 3-4 platforms in upstream that support coresight,
> > > and many are broken except Juno. But that doesn't imply all others
> > > are broken and we just can't derive that unless we have more information.
> >
> > For now we have a clear trend.  To me it is not a matter of broken vs.
> > non-broken but more about what people want to do or can
> > (realistically) do.
> >
>
> No disagreement there.
>
> > The coresight specification is broad and very permissive in terms of
> > implementation defined choices.  It is not because the TRCPDCR.PU it
> > not taken into account by a platform that it is automatically broken.
> > This could be a design choice or a trade off.  We already have two
> > ways of putting a CPU to sleep (architected or OS driven), we simply
> > do the same here for coresight.
> >
>
> Sure, if the term "broken" is inappropriate I am fine if anything else
> is used. The point is we are adding an idle notifier that adds latency
> and must be done if and only if necessary.
>
> How you identify that and implement doesn't bother me much, making
> that default just based on the fact that more platforms need it
> compared to others definitely does. So I am fine if this needs to be
> advertised *not broken* but *by design*, sure go for it.
>

Then all we have to do is make the ACPI/DT property that indicate the
method used to deal with tracer idling mandatory.  That way people are
conscious of the choice they are making.  To be backward compatible
with current systems we default to the TRCPDCR.PU method but print a
warning message, just like we do for obsolete DT bindings.

> My main concern was additional latency that this introduces for platforms
> that don't require.
>
> --
> Regards,
> Sudeep
Sudeep Holla June 20, 2019, 4:48 p.m. UTC | #16
On Wed, Jun 19, 2019 at 11:38:12AM +0100, Suzuki K Poulose wrote:

[...]

> >
> > I prefer to do this conditionally. It's unnecessary on systems which
> > don't ignore the TRCPDCR.PU and I really don't like them to be penalised
> > while we want to add this support for *broken* systems.
>
> It is conditional. i.e, you may disable the operation using a kernel/module
> parameter, which I think should be mentioned in the description here.
>

Thanks Suzuki for pointing this out. I completely missed this when I
read this email earlier. I am fine with that approach as long as the
default is flipped. Platforms must enable the feature and by enabling
save/restore they implicitly understand the latency impacts for idle.

--
Regards,
Sudeep
Sudeep Holla June 20, 2019, 4:52 p.m. UTC | #17
On Thu, Jun 20, 2019 at 10:47:38AM -0600, Mathieu Poirier wrote:
> On Thu, 20 Jun 2019 at 10:34, Sudeep Holla <sudeep.holla@arm.com> wrote:
> >
> > On Thu, Jun 20, 2019 at 10:14:04AM -0600, Mathieu Poirier wrote:
> > > On Thu, 20 Jun 2019 at 09:41, Sudeep Holla <sudeep.holla@arm.com> wrote:
> > > >
> >
> > [...]
> >
> > > > Sorry but even then I prefer it not to be default and force extra work
> > > > to the people who add support and constantly be reminded that it's
> > > > broken and they are deviating from default behaviour in the kernel
> > > > which may come and latency penalty.
> > > >
> > > > Making it default may hide the problem if Linux is used for some validation.
> > > >
> > > > Also we hardly have 3-4 platforms in upstream that support coresight,
> > > > and many are broken except Juno. But that doesn't imply all others
> > > > are broken and we just can't derive that unless we have more information.
> > >
> > > For now we have a clear trend.  To me it is not a matter of broken vs.
> > > non-broken but more about what people want to do or can
> > > (realistically) do.
> > >
> >
> > No disagreement there.
> >
> > > The coresight specification is broad and very permissive in terms of
> > > implementation defined choices.  It is not because the TRCPDCR.PU it
> > > not taken into account by a platform that it is automatically broken.
> > > This could be a design choice or a trade off.  We already have two
> > > ways of putting a CPU to sleep (architected or OS driven), we simply
> > > do the same here for coresight.
> > >
> >
> > Sure, if the term "broken" is inappropriate I am fine if anything else
> > is used. The point is we are adding an idle notifier that adds latency
> > and must be done if and only if necessary.
> >
> > How you identify that and implement doesn't bother me much, making
> > that default just based on the fact that more platforms need it
> > compared to others definitely does. So I am fine if this needs to be
> > advertised *not broken* but *by design*, sure go for it.
> >
>
> Then all we have to do is make the ACPI/DT property that indicate the
> method used to deal with tracer idling mandatory.  That way people are
> conscious of the choice they are making.  To be backward compatible
> with current systems we default to the TRCPDCR.PU method but print a
> warning message, just like we do for obsolete DT bindings.
>

If you are happy with kernel module/command line parameters, I am fine
by that too. I missed that earlier, sorry for the noise. But I still
need to keep the default disabled and platforms needing it must enable
it. If not architectural argument, I am still concerned with latency :)

--
Regards,
Sudeep
Andrew Murray June 20, 2019, 4:54 p.m. UTC | #18
On Thu, Jun 20, 2019 at 10:47:38AM -0600, Mathieu Poirier wrote:
> On Thu, 20 Jun 2019 at 10:34, Sudeep Holla <sudeep.holla@arm.com> wrote:
> >
> > On Thu, Jun 20, 2019 at 10:14:04AM -0600, Mathieu Poirier wrote:
> > > On Thu, 20 Jun 2019 at 09:41, Sudeep Holla <sudeep.holla@arm.com> wrote:
> > > >
> >
> > [...]
> >
> > > > Sorry but even then I prefer it not to be default and force extra work
> > > > to the people who add support and constantly be reminded that it's
> > > > broken and they are deviating from default behaviour in the kernel
> > > > which may come and latency penalty.
> > > >
> > > > Making it default may hide the problem if Linux is used for some validation.
> > > >
> > > > Also we hardly have 3-4 platforms in upstream that support coresight,
> > > > and many are broken except Juno. But that doesn't imply all others
> > > > are broken and we just can't derive that unless we have more information.
> > >
> > > For now we have a clear trend.  To me it is not a matter of broken vs.
> > > non-broken but more about what people want to do or can
> > > (realistically) do.
> > >
> >
> > No disagreement there.
> >
> > > The coresight specification is broad and very permissive in terms of
> > > implementation defined choices.  It is not because the TRCPDCR.PU it
> > > not taken into account by a platform that it is automatically broken.
> > > This could be a design choice or a trade off.  We already have two
> > > ways of putting a CPU to sleep (architected or OS driven), we simply
> > > do the same here for coresight.
> > >
> >
> > Sure, if the term "broken" is inappropriate I am fine if anything else
> > is used. The point is we are adding an idle notifier that adds latency
> > and must be done if and only if necessary.
> >
> > How you identify that and implement doesn't bother me much, making
> > that default just based on the fact that more platforms need it
> > compared to others definitely does. So I am fine if this needs to be
> > advertised *not broken* but *by design*, sure go for it.
> >
> 
> Then all we have to do is make the ACPI/DT property that indicate the
> method used to deal with tracer idling mandatory.  That way people are
> conscious of the choice they are making.  To be backward compatible
> with current systems we default to the TRCPDCR.PU method but print a
> warning message, just like we do for obsolete DT bindings.

I'll respin the series based on this approach. I'll also flip the
'disable_pm_save' module option to 'enable_pm_save' - thus allowing any
one to use software save/restore if they wish.

Thanks for the feedback everyone.

Thanks,

Andrew Murray

> 
> > My main concern was additional latency that this introduces for platforms
> > that don't require.
> >
> > --
> > Regards,
> > Sudeep
Suzuki K Poulose June 20, 2019, 5 p.m. UTC | #19
On 20/06/2019 17:54, Andrew Murray wrote:
> On Thu, Jun 20, 2019 at 10:47:38AM -0600, Mathieu Poirier wrote:
>> On Thu, 20 Jun 2019 at 10:34, Sudeep Holla <sudeep.holla@arm.com> wrote:
>>>
>>> On Thu, Jun 20, 2019 at 10:14:04AM -0600, Mathieu Poirier wrote:
>>>> On Thu, 20 Jun 2019 at 09:41, Sudeep Holla <sudeep.holla@arm.com> wrote:

...

>> Then all we have to do is make the ACPI/DT property that indicate the
>> method used to deal with tracer idling mandatory.  That way people are
>> conscious of the choice they are making.  To be backward compatible
>> with current systems we default to the TRCPDCR.PU method but print a
>> warning message, just like we do for obsolete DT bindings.
> 
> I'll respin the series based on this approach. I'll also flip the
> 'disable_pm_save' module option to 'enable_pm_save' - thus allowing any
> one to use software save/restore if they wish.

If you are going to add a firmware property, please get a consensus on the
name here, before respinning to avoid another churn :-). How about one of :

"arm,coresight-etm-looses-state"
"arm,coresight-etm-needs-save-restore"

or something better long the line.

Cheers
Suzuki
Mathieu Poirier June 20, 2019, 5:10 p.m. UTC | #20
On Thu, 20 Jun 2019 at 11:00, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:
>
>
>
> On 20/06/2019 17:54, Andrew Murray wrote:
> > On Thu, Jun 20, 2019 at 10:47:38AM -0600, Mathieu Poirier wrote:
> >> On Thu, 20 Jun 2019 at 10:34, Sudeep Holla <sudeep.holla@arm.com> wrote:
> >>>
> >>> On Thu, Jun 20, 2019 at 10:14:04AM -0600, Mathieu Poirier wrote:
> >>>> On Thu, 20 Jun 2019 at 09:41, Sudeep Holla <sudeep.holla@arm.com> wrote:
>
> ...
>
> >> Then all we have to do is make the ACPI/DT property that indicate the
> >> method used to deal with tracer idling mandatory.  That way people are
> >> conscious of the choice they are making.  To be backward compatible
> >> with current systems we default to the TRCPDCR.PU method but print a
> >> warning message, just like we do for obsolete DT bindings.
> >
> > I'll respin the series based on this approach. I'll also flip the
> > 'disable_pm_save' module option to 'enable_pm_save' - thus allowing any
> > one to use software save/restore if they wish.
>
> If you are going to add a firmware property, please get a consensus on the
> name here, before respinning to avoid another churn :-). How about one of :
>
> "arm,coresight-etm-looses-state"
> "arm,coresight-etm-needs-save-restore"

"arm,coresight-needs-save-restore"

That way we can also use it for CTI.

>
> or something better long the line.
>
> Cheers
> Suzuki
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Sudeep Holla June 20, 2019, 5:11 p.m. UTC | #21
On Thu, Jun 20, 2019 at 06:00:48PM +0100, Suzuki K Poulose wrote:
>
>
> On 20/06/2019 17:54, Andrew Murray wrote:
> > On Thu, Jun 20, 2019 at 10:47:38AM -0600, Mathieu Poirier wrote:
> > > On Thu, 20 Jun 2019 at 10:34, Sudeep Holla <sudeep.holla@arm.com> wrote:
> > > >
> > > > On Thu, Jun 20, 2019 at 10:14:04AM -0600, Mathieu Poirier wrote:
> > > > > On Thu, 20 Jun 2019 at 09:41, Sudeep Holla <sudeep.holla@arm.com> wrote:
>
> ...
>
> > > Then all we have to do is make the ACPI/DT property that indicate the
> > > method used to deal with tracer idling mandatory.  That way people are
> > > conscious of the choice they are making.  To be backward compatible
> > > with current systems we default to the TRCPDCR.PU method but print a
> > > warning message, just like we do for obsolete DT bindings.
> >
> > I'll respin the series based on this approach. I'll also flip the
> > 'disable_pm_save' module option to 'enable_pm_save' - thus allowing any
> > one to use software save/restore if they wish.
>
> If you are going to add a firmware property, please get a consensus on the
> name here, before respinning to avoid another churn :-). How about one of :
>
> "arm,coresight-etm-looses-state"
> "arm,coresight-etm-needs-save-restore"
>

Just to be more clear, I am fine with just kernel command/module parameter
approach and DT bindings may not be required. If at all it is decided to
take DT approach, then you really don't need command/module parameter IMO.
I will leave that to you and Mathieu, wanted to make sure I am not
contributing to the confusion yet again.

--
Regards,
Sudeep
Mathieu Poirier June 20, 2019, 6 p.m. UTC | #22
On Thu, 20 Jun 2019 at 11:11, Sudeep Holla <sudeep.holla@arm.com> wrote:
>
> On Thu, Jun 20, 2019 at 06:00:48PM +0100, Suzuki K Poulose wrote:
> >
> >
> > On 20/06/2019 17:54, Andrew Murray wrote:
> > > On Thu, Jun 20, 2019 at 10:47:38AM -0600, Mathieu Poirier wrote:
> > > > On Thu, 20 Jun 2019 at 10:34, Sudeep Holla <sudeep.holla@arm.com> wrote:
> > > > >
> > > > > On Thu, Jun 20, 2019 at 10:14:04AM -0600, Mathieu Poirier wrote:
> > > > > > On Thu, 20 Jun 2019 at 09:41, Sudeep Holla <sudeep.holla@arm.com> wrote:
> >
> > ...
> >
> > > > Then all we have to do is make the ACPI/DT property that indicate the
> > > > method used to deal with tracer idling mandatory.  That way people are
> > > > conscious of the choice they are making.  To be backward compatible
> > > > with current systems we default to the TRCPDCR.PU method but print a
> > > > warning message, just like we do for obsolete DT bindings.
> > >
> > > I'll respin the series based on this approach. I'll also flip the
> > > 'disable_pm_save' module option to 'enable_pm_save' - thus allowing any
> > > one to use software save/restore if they wish.
> >
> > If you are going to add a firmware property, please get a consensus on the
> > name here, before respinning to avoid another churn :-). How about one of :
> >
> > "arm,coresight-etm-looses-state"
> > "arm,coresight-etm-needs-save-restore"
> >
>
> Just to be more clear, I am fine with just kernel command/module parameter
> approach and DT bindings may not be required. If at all it is decided to
> take DT approach, then you really don't need command/module parameter IMO.
> I will leave that to you and Mathieu, wanted to make sure I am not
> contributing to the confusion yet again.

Both DT/ACPI and module parameters are needed.

The module parameter is there to overwrite the default setting for
debugging purposes when using the notifiers method and assumes cores
have been prevented from idling - otherwise the system will crash.
The DT/ACPI property is needed to tell coresight which method to use,
i.e architected of notifiers, to avoid latency issues when possible.

Thanks,
Mathieu

>
> --
> Regards,
> Sudeep
Andrew Murray June 21, 2019, 9:29 a.m. UTC | #23
On Thu, Jun 20, 2019 at 11:10:26AM -0600, Mathieu Poirier wrote:
> On Thu, 20 Jun 2019 at 11:00, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:
> >
> >
> >
> > On 20/06/2019 17:54, Andrew Murray wrote:
> > > On Thu, Jun 20, 2019 at 10:47:38AM -0600, Mathieu Poirier wrote:
> > >> On Thu, 20 Jun 2019 at 10:34, Sudeep Holla <sudeep.holla@arm.com> wrote:
> > >>>
> > >>> On Thu, Jun 20, 2019 at 10:14:04AM -0600, Mathieu Poirier wrote:
> > >>>> On Thu, 20 Jun 2019 at 09:41, Sudeep Holla <sudeep.holla@arm.com> wrote:
> >
> > ...
> >
> > >> Then all we have to do is make the ACPI/DT property that indicate the
> > >> method used to deal with tracer idling mandatory.  That way people are
> > >> conscious of the choice they are making.  To be backward compatible
> > >> with current systems we default to the TRCPDCR.PU method but print a
> > >> warning message, just like we do for obsolete DT bindings.
> > >
> > > I'll respin the series based on this approach. I'll also flip the
> > > 'disable_pm_save' module option to 'enable_pm_save' - thus allowing any
> > > one to use software save/restore if they wish.
> >
> > If you are going to add a firmware property, please get a consensus on the
> > name here, before respinning to avoid another churn :-). How about one of :
> >
> > "arm,coresight-etm-looses-state"
> > "arm,coresight-etm-needs-save-restore"
> 
> "arm,coresight-needs-save-restore"
> 
> That way we can also use it for CTI.

I have no objections with "arm,coresight-needs-save-restore", however in
trying to come up with a name that describes the hardware rather than
something software needs to do, I came up with:

 "arm,coresight-broken-pu"
 "arm,coresight-no-pu"
 "arm,coresight-pu-ignored"

Are any of these prefered?

Thanks,

Andrew Murray

> 
> >
> > or something better long the line.
> >
> > Cheers
> > Suzuki
> >
> > _______________________________________________
> > linux-arm-kernel mailing list
> > linux-arm-kernel@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Mathieu Poirier June 21, 2019, 3:30 p.m. UTC | #24
On Fri, 21 Jun 2019 at 03:29, Andrew Murray <andrew.murray@arm.com> wrote:
>
> On Thu, Jun 20, 2019 at 11:10:26AM -0600, Mathieu Poirier wrote:
> > On Thu, 20 Jun 2019 at 11:00, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:
> > >
> > >
> > >
> > > On 20/06/2019 17:54, Andrew Murray wrote:
> > > > On Thu, Jun 20, 2019 at 10:47:38AM -0600, Mathieu Poirier wrote:
> > > >> On Thu, 20 Jun 2019 at 10:34, Sudeep Holla <sudeep.holla@arm.com> wrote:
> > > >>>
> > > >>> On Thu, Jun 20, 2019 at 10:14:04AM -0600, Mathieu Poirier wrote:
> > > >>>> On Thu, 20 Jun 2019 at 09:41, Sudeep Holla <sudeep.holla@arm.com> wrote:
> > >
> > > ...
> > >
> > > >> Then all we have to do is make the ACPI/DT property that indicate the
> > > >> method used to deal with tracer idling mandatory.  That way people are
> > > >> conscious of the choice they are making.  To be backward compatible
> > > >> with current systems we default to the TRCPDCR.PU method but print a
> > > >> warning message, just like we do for obsolete DT bindings.
> > > >
> > > > I'll respin the series based on this approach. I'll also flip the
> > > > 'disable_pm_save' module option to 'enable_pm_save' - thus allowing any
> > > > one to use software save/restore if they wish.
> > >
> > > If you are going to add a firmware property, please get a consensus on the
> > > name here, before respinning to avoid another churn :-). How about one of :
> > >
> > > "arm,coresight-etm-looses-state"
> > > "arm,coresight-etm-needs-save-restore"
> >
> > "arm,coresight-needs-save-restore"
> >
> > That way we can also use it for CTI.
>
> I have no objections with "arm,coresight-needs-save-restore", however in
> trying to come up with a name that describes the hardware rather than
> something software needs to do, I came up with:
>
>  "arm,coresight-broken-pu"
>  "arm,coresight-no-pu"
>  "arm,coresight-pu-ignored"
>
> Are any of these prefered?

None of the above because 1) the case we are dealing with as been
inaccurately labeled as "broken" and 2) anything referring to "pu" is
exclusively related to ETMs.

>
> Thanks,
>
> Andrew Murray
>
> >
> > >
> > > or something better long the line.
> > >
> > > Cheers
> > > Suzuki
> > >
> > > _______________________________________________
> > > linux-arm-kernel mailing list
> > > linux-arm-kernel@lists.infradead.org
> > > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Suzuki K Poulose June 25, 2019, 10:07 a.m. UTC | #25
Hi Mathieu,

On 18/06/2019 23:55, Mathieu Poirier wrote:
> On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
>> Some hardware will ignore bit TRCPDCR.PU which is used to signal
>> to hardware that power should not be removed from the trace unit.
>> Let's mitigate against this by saving and restoring the trace
>> unit state when the CPU enters low power states.
>>
>> To provide the benefit to both self-hosted and external debuggers
>> we save/restore the entire state which includes etmv4_config data
>> and dynamic data such as inflight counter values, sequencer
>> states, etc.
>>
>> To reduce CPU suspend/resume latency the state is only saved or
>> restored if coresight is in use as determined by the claimset
>> registers.
>>
>> To aid debug of CPU suspend/resume a disable_pm_save parameter
>> is provided to disable this feature.
>>
>> Signed-off-by: Andrew Murray <andrew.murray@arm.com>


>> +static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
>> +			      void *v)
>> +{
>> +	struct etmv4_drvdata *drvdata = container_of(nb,
>> +					struct etmv4_drvdata, nb);
>> +
>> +	if (disable_pm_save)
>> +		return NOTIFY_OK;
>> +
>> +	switch (cmd) {
>> +	case CPU_PM_ENTER:
>> +		/* save the state if coresight is in use */
>> +		if (coresight_is_claimed_any(drvdata->base))
> 
> claimed_any()? At this point if coresight_is_claimed_self_hosted() == false an
> external agent is competing with the framework and we should abdicate.

I think claimed_any() is correct check. As per PSCI, ARM DEN 0022D, section
6.8.1 Debug and Trace save and restore,  the OS software is
in charge of save/restoring the context of Debug/Trace. The claim tags
are a mechanism to indicate who is consuming the components. Also, given
the OS software doesn't have a reliable way to communicate back to the
the External debugger about its decision to power down the CPU, that
makes sense to save/restore it.

Cheers
Suzuki
Mathieu Poirier June 25, 2019, 7:57 p.m. UTC | #26
Hi,

On Tue, 25 Jun 2019 at 04:07, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:
>
> Hi Mathieu,
>
> On 18/06/2019 23:55, Mathieu Poirier wrote:
> > On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
> >> Some hardware will ignore bit TRCPDCR.PU which is used to signal
> >> to hardware that power should not be removed from the trace unit.
> >> Let's mitigate against this by saving and restoring the trace
> >> unit state when the CPU enters low power states.
> >>
> >> To provide the benefit to both self-hosted and external debuggers
> >> we save/restore the entire state which includes etmv4_config data
> >> and dynamic data such as inflight counter values, sequencer
> >> states, etc.
> >>
> >> To reduce CPU suspend/resume latency the state is only saved or
> >> restored if coresight is in use as determined by the claimset
> >> registers.
> >>
> >> To aid debug of CPU suspend/resume a disable_pm_save parameter
> >> is provided to disable this feature.
> >>
> >> Signed-off-by: Andrew Murray <andrew.murray@arm.com>
>
>
> >> +static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
> >> +                          void *v)
> >> +{
> >> +    struct etmv4_drvdata *drvdata = container_of(nb,
> >> +                                    struct etmv4_drvdata, nb);
> >> +
> >> +    if (disable_pm_save)
> >> +            return NOTIFY_OK;
> >> +
> >> +    switch (cmd) {
> >> +    case CPU_PM_ENTER:
> >> +            /* save the state if coresight is in use */
> >> +            if (coresight_is_claimed_any(drvdata->base))
> >
> > claimed_any()? At this point if coresight_is_claimed_self_hosted() == false an
> > external agent is competing with the framework and we should abdicate.
>
> I think claimed_any() is correct check. As per PSCI, ARM DEN 0022D, section
> 6.8.1 Debug and Trace save and restore,  the OS software is
> in charge of save/restoring the context of Debug/Trace. The claim tags
> are a mechanism to indicate who is consuming the components. Also, given
> the OS software doesn't have a reliable way to communicate back to the
> the External debugger about its decision to power down the CPU, that
> makes sense to save/restore it.

What I understand from section 6.8.1 is that supervisory and OS power
management SW are responsible to save the debug context when operating
in their respective mode, which reflects my comment above.

I also see that two options are available to an external agent, i.e
either use the DBGNOPWRDWN and DBGPWRUPREQ bits to request powerdown
emulation or use the "OS Unlock Catch" debug event (which probably
relates to the lost of context bit) to restore the debug context.
From where I stand there is no provision for OS power management code
to take care of the debug context of an external agent.  Am I missing
something here?

>
> Cheers
> Suzuki
Mike Leach June 26, 2019, 10:21 a.m. UTC | #27
Hi,

Sorry, a bit late on this set as it didn't appear in the Coresight
mailing list as expected per suzukis suggestion.

On Tue, 25 Jun 2019 at 20:57, Mathieu Poirier
<mathieu.poirier@linaro.org> wrote:
>
> Hi,
>
> On Tue, 25 Jun 2019 at 04:07, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:
> >
> > Hi Mathieu,
> >
> > On 18/06/2019 23:55, Mathieu Poirier wrote:
> > > On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
> > >> Some hardware will ignore bit TRCPDCR.PU which is used to signal
> > >> to hardware that power should not be removed from the trace unit.
> > >> Let's mitigate against this by saving and restoring the trace
> > >> unit state when the CPU enters low power states.
> > >>
> > >> To provide the benefit to both self-hosted and external debuggers
> > >> we save/restore the entire state which includes etmv4_config data
> > >> and dynamic data such as inflight counter values, sequencer
> > >> states, etc.
> > >>
> > >> To reduce CPU suspend/resume latency the state is only saved or
> > >> restored if coresight is in use as determined by the claimset
> > >> registers.
> > >>
> > >> To aid debug of CPU suspend/resume a disable_pm_save parameter
> > >> is provided to disable this feature.
> > >>
> > >> Signed-off-by: Andrew Murray <andrew.murray@arm.com>
> >
> >
> > >> +static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
> > >> +                          void *v)
> > >> +{
> > >> +    struct etmv4_drvdata *drvdata = container_of(nb,
> > >> +                                    struct etmv4_drvdata, nb);
> > >> +
> > >> +    if (disable_pm_save)
> > >> +            return NOTIFY_OK;
> > >> +
> > >> +    switch (cmd) {
> > >> +    case CPU_PM_ENTER:
> > >> +            /* save the state if coresight is in use */
> > >> +            if (coresight_is_claimed_any(drvdata->base))
> > >
> > > claimed_any()? At this point if coresight_is_claimed_self_hosted() == false an
> > > external agent is competing with the framework and we should abdicate.
> >
> > I think claimed_any() is correct check. As per PSCI, ARM DEN 0022D, section
> > 6.8.1 Debug and Trace save and restore,  the OS software is
> > in charge of save/restoring the context of Debug/Trace. The claim tags
> > are a mechanism to indicate who is consuming the components. Also, given
> > the OS software doesn't have a reliable way to communicate back to the
> > the External debugger about its decision to power down the CPU, that
> > makes sense to save/restore it.
>
> What I understand from section 6.8.1 is that supervisory and OS power
> management SW are responsible to save the debug context when operating
> in their respective mode, which reflects my comment above.
>
> I also see that two options are available to an external agent, i.e
> either use the DBGNOPWRDWN and DBGPWRUPREQ bits to request powerdown
> emulation or use the "OS Unlock Catch" debug event (which probably
> relates to the lost of context bit) to restore the debug context.
> From where I stand there is no provision for OS power management code
> to take care of the debug context of an external agent.  Am I missing
> something here?
>

OS lock is precisely the provision designed for an OS to handle
save/restore on behalf of an external debug agent. OS lock blocks the
external debugger from accessing the coresight when it is powered but
being updated by the OS

A scenario may be:-
a) external debug halts core(s) & programs Coresight subsystem -
likely extracting trace via TPIU.
b) external debug agent restarts cores - linux (continues) running /
booting - collecting the trace we want.
c) Some event happens and the external debug agent regains control.
(breakpoint / halt request).

During b) cores may be powering up and down. When this happens we need
the state to be saved and restored so that trace continues. (assuming
that the various debug power requests above are either not supported
in the fw/hardware or not asserted by the external agent).
The external debug agent cannot safely manipulate coresight during
this period - it can never know if a register is going to be available
- a classic race condition.

Irrespective of whoever "owns" the ETM programming - if the CPUidle
notification is required due to implementation issues, then in both
cases the save and restore is required.

For the external agent owner I agree that everything needs to be saved
 - but for self hosted, just the dynamic values should be read back,
much of the remainder of the information is already held in the driver
in etmv4_config. This should help reduce at least the power down
latency.




Regards

Mike


> >
> > Cheers
> > Suzuki
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Mathieu Poirier June 26, 2019, 4:57 p.m. UTC | #28
On Wed, 26 Jun 2019 at 04:21, Mike Leach <mike.leach@linaro.org> wrote:
>
> Hi,
>
> Sorry, a bit late on this set as it didn't appear in the Coresight
> mailing list as expected per suzukis suggestion.
>
> On Tue, 25 Jun 2019 at 20:57, Mathieu Poirier
> <mathieu.poirier@linaro.org> wrote:
> >
> > Hi,
> >
> > On Tue, 25 Jun 2019 at 04:07, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:
> > >
> > > Hi Mathieu,
> > >
> > > On 18/06/2019 23:55, Mathieu Poirier wrote:
> > > > On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
> > > >> Some hardware will ignore bit TRCPDCR.PU which is used to signal
> > > >> to hardware that power should not be removed from the trace unit.
> > > >> Let's mitigate against this by saving and restoring the trace
> > > >> unit state when the CPU enters low power states.
> > > >>
> > > >> To provide the benefit to both self-hosted and external debuggers
> > > >> we save/restore the entire state which includes etmv4_config data
> > > >> and dynamic data such as inflight counter values, sequencer
> > > >> states, etc.
> > > >>
> > > >> To reduce CPU suspend/resume latency the state is only saved or
> > > >> restored if coresight is in use as determined by the claimset
> > > >> registers.
> > > >>
> > > >> To aid debug of CPU suspend/resume a disable_pm_save parameter
> > > >> is provided to disable this feature.
> > > >>
> > > >> Signed-off-by: Andrew Murray <andrew.murray@arm.com>
> > >
> > >
> > > >> +static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
> > > >> +                          void *v)
> > > >> +{
> > > >> +    struct etmv4_drvdata *drvdata = container_of(nb,
> > > >> +                                    struct etmv4_drvdata, nb);
> > > >> +
> > > >> +    if (disable_pm_save)
> > > >> +            return NOTIFY_OK;
> > > >> +
> > > >> +    switch (cmd) {
> > > >> +    case CPU_PM_ENTER:
> > > >> +            /* save the state if coresight is in use */
> > > >> +            if (coresight_is_claimed_any(drvdata->base))
> > > >
> > > > claimed_any()? At this point if coresight_is_claimed_self_hosted() == false an
> > > > external agent is competing with the framework and we should abdicate.
> > >
> > > I think claimed_any() is correct check. As per PSCI, ARM DEN 0022D, section
> > > 6.8.1 Debug and Trace save and restore,  the OS software is
> > > in charge of save/restoring the context of Debug/Trace. The claim tags
> > > are a mechanism to indicate who is consuming the components. Also, given
> > > the OS software doesn't have a reliable way to communicate back to the
> > > the External debugger about its decision to power down the CPU, that
> > > makes sense to save/restore it.
> >
> > What I understand from section 6.8.1 is that supervisory and OS power
> > management SW are responsible to save the debug context when operating
> > in their respective mode, which reflects my comment above.
> >
> > I also see that two options are available to an external agent, i.e
> > either use the DBGNOPWRDWN and DBGPWRUPREQ bits to request powerdown
> > emulation or use the "OS Unlock Catch" debug event (which probably
> > relates to the lost of context bit) to restore the debug context.
> > From where I stand there is no provision for OS power management code
> > to take care of the debug context of an external agent.  Am I missing
> > something here?
> >
>
> OS lock is precisely the provision designed for an OS to handle
> save/restore on behalf of an external debug agent. OS lock blocks the
> external debugger from accessing the coresight when it is powered but
> being updated by the OS
>
> A scenario may be:-
> a) external debug halts core(s) & programs Coresight subsystem -
> likely extracting trace via TPIU.
> b) external debug agent restarts cores - linux (continues) running /
> booting - collecting the trace we want.
> c) Some event happens and the external debug agent regains control.
> (breakpoint / halt request).
>
> During b) cores may be powering up and down. When this happens we need
> the state to be saved and restored so that trace continues. (assuming
> that the various debug power requests above are either not supported
> in the fw/hardware or not asserted by the external agent).
> The external debug agent cannot safely manipulate coresight during
> this period - it can never know if a register is going to be available
> - a classic race condition.
>
> Irrespective of whoever "owns" the ETM programming - if the CPUidle
> notification is required due to implementation issues, then in both
> cases the save and restore is required.
>
> For the external agent owner I agree that everything needs to be saved
>  - but for self hosted, just the dynamic values should be read back,
> much of the remainder of the information is already held in the driver
> in etmv4_config. This should help reduce at least the power down
> latency.
>
>

Many thanks for shedding light on the expectations of external agents.

Andrew, from Mike's explanation the original implementation of
checking any of the claimtag bits to trigger a save/restore was valid.
It also means that contrary to one of my previous comment, context
save restore should always be performed regardless of whether the
framework is being used for self hosted debugging or not.

Thanks,
Mathieu

>
>
> Regards
>
> Mike
>
>
> > >
> > > Cheers
> > > Suzuki
> >
> > _______________________________________________
> > linux-arm-kernel mailing list
> > linux-arm-kernel@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
>
>
> --
> Mike Leach
> Principal Engineer, ARM Ltd.
> Manchester Design Centre. UK
Andrew Murray June 27, 2019, 8:12 a.m. UTC | #29
On Wed, Jun 26, 2019 at 10:57:17AM -0600, Mathieu Poirier wrote:
> On Wed, 26 Jun 2019 at 04:21, Mike Leach <mike.leach@linaro.org> wrote:
> >
> > Hi,
> >
> > Sorry, a bit late on this set as it didn't appear in the Coresight
> > mailing list as expected per suzukis suggestion.
> >
> > On Tue, 25 Jun 2019 at 20:57, Mathieu Poirier
> > <mathieu.poirier@linaro.org> wrote:
> > >
> > > Hi,
> > >
> > > On Tue, 25 Jun 2019 at 04:07, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:
> > > >
> > > > Hi Mathieu,
> > > >
> > > > On 18/06/2019 23:55, Mathieu Poirier wrote:
> > > > > On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
> > > > >> Some hardware will ignore bit TRCPDCR.PU which is used to signal
> > > > >> to hardware that power should not be removed from the trace unit.
> > > > >> Let's mitigate against this by saving and restoring the trace
> > > > >> unit state when the CPU enters low power states.
> > > > >>
> > > > >> To provide the benefit to both self-hosted and external debuggers
> > > > >> we save/restore the entire state which includes etmv4_config data
> > > > >> and dynamic data such as inflight counter values, sequencer
> > > > >> states, etc.
> > > > >>
> > > > >> To reduce CPU suspend/resume latency the state is only saved or
> > > > >> restored if coresight is in use as determined by the claimset
> > > > >> registers.
> > > > >>
> > > > >> To aid debug of CPU suspend/resume a disable_pm_save parameter
> > > > >> is provided to disable this feature.
> > > > >>
> > > > >> Signed-off-by: Andrew Murray <andrew.murray@arm.com>
> > > >
> > > >
> > > > >> +static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
> > > > >> +                          void *v)
> > > > >> +{
> > > > >> +    struct etmv4_drvdata *drvdata = container_of(nb,
> > > > >> +                                    struct etmv4_drvdata, nb);
> > > > >> +
> > > > >> +    if (disable_pm_save)
> > > > >> +            return NOTIFY_OK;
> > > > >> +
> > > > >> +    switch (cmd) {
> > > > >> +    case CPU_PM_ENTER:
> > > > >> +            /* save the state if coresight is in use */
> > > > >> +            if (coresight_is_claimed_any(drvdata->base))
> > > > >
> > > > > claimed_any()? At this point if coresight_is_claimed_self_hosted() == false an
> > > > > external agent is competing with the framework and we should abdicate.
> > > >
> > > > I think claimed_any() is correct check. As per PSCI, ARM DEN 0022D, section
> > > > 6.8.1 Debug and Trace save and restore,  the OS software is
> > > > in charge of save/restoring the context of Debug/Trace. The claim tags
> > > > are a mechanism to indicate who is consuming the components. Also, given
> > > > the OS software doesn't have a reliable way to communicate back to the
> > > > the External debugger about its decision to power down the CPU, that
> > > > makes sense to save/restore it.
> > >
> > > What I understand from section 6.8.1 is that supervisory and OS power
> > > management SW are responsible to save the debug context when operating
> > > in their respective mode, which reflects my comment above.
> > >
> > > I also see that two options are available to an external agent, i.e
> > > either use the DBGNOPWRDWN and DBGPWRUPREQ bits to request powerdown
> > > emulation or use the "OS Unlock Catch" debug event (which probably
> > > relates to the lost of context bit) to restore the debug context.
> > > From where I stand there is no provision for OS power management code
> > > to take care of the debug context of an external agent.  Am I missing
> > > something here?
> > >
> >
> > OS lock is precisely the provision designed for an OS to handle
> > save/restore on behalf of an external debug agent. OS lock blocks the
> > external debugger from accessing the coresight when it is powered but
> > being updated by the OS
> >
> > A scenario may be:-
> > a) external debug halts core(s) & programs Coresight subsystem -
> > likely extracting trace via TPIU.
> > b) external debug agent restarts cores - linux (continues) running /
> > booting - collecting the trace we want.
> > c) Some event happens and the external debug agent regains control.
> > (breakpoint / halt request).
> >
> > During b) cores may be powering up and down. When this happens we need
> > the state to be saved and restored so that trace continues. (assuming
> > that the various debug power requests above are either not supported
> > in the fw/hardware or not asserted by the external agent).
> > The external debug agent cannot safely manipulate coresight during
> > this period - it can never know if a register is going to be available
> > - a classic race condition.
> >
> > Irrespective of whoever "owns" the ETM programming - if the CPUidle
> > notification is required due to implementation issues, then in both
> > cases the save and restore is required.
> >
> > For the external agent owner I agree that everything needs to be saved
> >  - but for self hosted, just the dynamic values should be read back,
> > much of the remainder of the information is already held in the driver
> > in etmv4_config. This should help reduce at least the power down
> > latency.
> >
> >
> 
> Many thanks for shedding light on the expectations of external agents.

Indeed, thanks for the helpful feedback.

> 
> Andrew, from Mike's explanation the original implementation of
> checking any of the claimtag bits to trigger a save/restore was valid.
> It also means that contrary to one of my previous comment, context
> save restore should always be performed regardless of whether the
> framework is being used for self hosted debugging or not.

Thanks for clarifying this for me. I'll share a v2 shortly.

Thanks,

Andrew Murray

> 
> Thanks,
> Mathieu
> 
> >
> >
> > Regards
> >
> > Mike
> >
> >
> > > >
> > > > Cheers
> > > > Suzuki
> > >
> > > _______________________________________________
> > > linux-arm-kernel mailing list
> > > linux-arm-kernel@lists.infradead.org
> > > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> >
> >
> >
> > --
> > Mike Leach
> > Principal Engineer, ARM Ltd.
> > Manchester Design Centre. UK
Andrew Murray June 27, 2019, 8:17 a.m. UTC | #30
On Wed, Jun 26, 2019 at 11:21:35AM +0100, Mike Leach wrote:
> Hi,
> 
> Sorry, a bit late on this set as it didn't appear in the Coresight
> mailing list as expected per suzukis suggestion.
> 
> On Tue, 25 Jun 2019 at 20:57, Mathieu Poirier
> <mathieu.poirier@linaro.org> wrote:
> >
> > Hi,
> >
> > On Tue, 25 Jun 2019 at 04:07, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:
> > >
> > > Hi Mathieu,
> > >
> > > On 18/06/2019 23:55, Mathieu Poirier wrote:
> > > > On Tue, Jun 18, 2019 at 01:54:33PM +0100, Andrew Murray wrote:
> > > >> Some hardware will ignore bit TRCPDCR.PU which is used to signal
> > > >> to hardware that power should not be removed from the trace unit.
> > > >> Let's mitigate against this by saving and restoring the trace
> > > >> unit state when the CPU enters low power states.
> > > >>
> > > >> To provide the benefit to both self-hosted and external debuggers
> > > >> we save/restore the entire state which includes etmv4_config data
> > > >> and dynamic data such as inflight counter values, sequencer
> > > >> states, etc.
> > > >>
> > > >> To reduce CPU suspend/resume latency the state is only saved or
> > > >> restored if coresight is in use as determined by the claimset
> > > >> registers.
> > > >>
> > > >> To aid debug of CPU suspend/resume a disable_pm_save parameter
> > > >> is provided to disable this feature.
> > > >>
> > > >> Signed-off-by: Andrew Murray <andrew.murray@arm.com>
> > >
> > >
> > > >> +static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
> > > >> +                          void *v)
> > > >> +{
> > > >> +    struct etmv4_drvdata *drvdata = container_of(nb,
> > > >> +                                    struct etmv4_drvdata, nb);
> > > >> +
> > > >> +    if (disable_pm_save)
> > > >> +            return NOTIFY_OK;
> > > >> +
> > > >> +    switch (cmd) {
> > > >> +    case CPU_PM_ENTER:
> > > >> +            /* save the state if coresight is in use */
> > > >> +            if (coresight_is_claimed_any(drvdata->base))
> > > >
> > > > claimed_any()? At this point if coresight_is_claimed_self_hosted() == false an
> > > > external agent is competing with the framework and we should abdicate.
> > >
> > > I think claimed_any() is correct check. As per PSCI, ARM DEN 0022D, section
> > > 6.8.1 Debug and Trace save and restore,  the OS software is
> > > in charge of save/restoring the context of Debug/Trace. The claim tags
> > > are a mechanism to indicate who is consuming the components. Also, given
> > > the OS software doesn't have a reliable way to communicate back to the
> > > the External debugger about its decision to power down the CPU, that
> > > makes sense to save/restore it.
> >
> > What I understand from section 6.8.1 is that supervisory and OS power
> > management SW are responsible to save the debug context when operating
> > in their respective mode, which reflects my comment above.
> >
> > I also see that two options are available to an external agent, i.e
> > either use the DBGNOPWRDWN and DBGPWRUPREQ bits to request powerdown
> > emulation or use the "OS Unlock Catch" debug event (which probably
> > relates to the lost of context bit) to restore the debug context.
> > From where I stand there is no provision for OS power management code
> > to take care of the debug context of an external agent.  Am I missing
> > something here?
> >
> 
> OS lock is precisely the provision designed for an OS to handle
> save/restore on behalf of an external debug agent. OS lock blocks the
> external debugger from accessing the coresight when it is powered but
> being updated by the OS
> 
> A scenario may be:-
> a) external debug halts core(s) & programs Coresight subsystem -
> likely extracting trace via TPIU.
> b) external debug agent restarts cores - linux (continues) running /
> booting - collecting the trace we want.
> c) Some event happens and the external debug agent regains control.
> (breakpoint / halt request).
> 
> During b) cores may be powering up and down. When this happens we need
> the state to be saved and restored so that trace continues. (assuming
> that the various debug power requests above are either not supported
> in the fw/hardware or not asserted by the external agent).
> The external debug agent cannot safely manipulate coresight during
> this period - it can never know if a register is going to be available
> - a classic race condition.
> 
> Irrespective of whoever "owns" the ETM programming - if the CPUidle
> notification is required due to implementation issues, then in both
> cases the save and restore is required.

Thanks for this.

> 
> For the external agent owner I agree that everything needs to be saved
>  - but for self hosted, just the dynamic values should be read back,
> much of the remainder of the information is already held in the driver
> in etmv4_config. This should help reduce at least the power down
> latency.

I agree - we don't need to save/restore as many registers as we currently
do for self-hosted - and thus we have the opportunity to optimise latency
for this use case.

However, let's see where v2 takes us and measure the impact before adding
this optimisation. I'm keen to avoid complexity where possible.

Thanks,

Andrew Murray

> 
> 
> 
> 
> Regards
> 
> Mike
> 
> 
> > >
> > > Cheers
> > > Suzuki
> >
> > _______________________________________________
> > linux-arm-kernel mailing list
> > linux-arm-kernel@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 
> 
> 
> -- 
> Mike Leach
> Principal Engineer, ARM Ltd.
> Manchester Design Centre. UK
diff mbox series

Patch

diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
index bda90d4cd62b..d27c5e0d9aec 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -18,6 +18,7 @@ 
 #include <linux/stat.h>
 #include <linux/clk.h>
 #include <linux/cpu.h>
+#include <linux/cpu_pm.h>
 #include <linux/coresight.h>
 #include <linux/coresight-pmu.h>
 #include <linux/pm_wakeup.h>
@@ -36,6 +37,9 @@ 
 static int boot_enable;
 module_param_named(boot_enable, boot_enable, int, 0444);
 
+static int disable_pm_save;
+module_param_named(disable_pm_save, disable_pm_save, int, 0444);
+
 /* The number of ETMv4 currently registered */
 static int etm4_count;
 static struct etmv4_drvdata *etmdrvdata[NR_CPUS];
@@ -53,6 +57,14 @@  static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
 	isb();
 }
 
+static void etm4_os_lock(struct etmv4_drvdata *drvdata)
+{
+	/* Writing 0x1 to TRCOSLAR unlocks the trace registers */
+	writel_relaxed(0x1, drvdata->base + TRCOSLAR);
+	drvdata->os_unlock = false;
+	isb();
+}
+
 static bool etm4_arch_supported(u8 arch)
 {
 	/* Mask out the minor version number */
@@ -1076,6 +1088,235 @@  static void etm4_init_trace_id(struct etmv4_drvdata *drvdata)
 	drvdata->trcid = coresight_get_trace_id(drvdata->cpu);
 }
 
+#ifdef CONFIG_CPU_PM
+static void etm4_cpu_save(struct etmv4_drvdata *drvdata)
+{
+	int i;
+	u32 control;
+	struct etmv4_save_state *state;
+
+	/* As recommended by 3.4.1 of ARM IHI 0064D */
+	dsb(sy);
+	isb();
+
+	CS_UNLOCK(drvdata->base);
+	etm4_os_lock(drvdata);
+
+	/* wait for TRCSTATR.PMSTABLE to go up */
+	if (coresight_timeout(drvdata->base, TRCSTATR,
+					TRCSTATR_PMSTABLE_BIT, 1))
+		dev_err(drvdata->dev,
+			"timeout while waiting for Idle Trace Status\n");
+
+	state = &drvdata->save_state;
+
+	state->trcprgctlr = readl(drvdata->base + TRCPRGCTLR);
+	state->trcprocselr = readl(drvdata->base + TRCPROCSELR);
+	state->trcconfigr = readl(drvdata->base + TRCCONFIGR);
+	state->trcauxctlr = readl(drvdata->base + TRCAUXCTLR);
+	state->trceventctl0r = readl(drvdata->base + TRCEVENTCTL0R);
+	state->trceventctl1r = readl(drvdata->base + TRCEVENTCTL1R);
+	state->trcstallctlr = readl(drvdata->base + TRCSTALLCTLR);
+	state->trctsctlr = readl(drvdata->base + TRCTSCTLR);
+	state->trcsyncpr = readl(drvdata->base + TRCSYNCPR);
+	state->trcccctlr = readl(drvdata->base + TRCCCCTLR);
+	state->trcbbctlr = readl(drvdata->base + TRCBBCTLR);
+	state->trctraceidr = readl(drvdata->base + TRCTRACEIDR);
+	state->trcqctlr = readl(drvdata->base + TRCQCTLR);
+
+	state->trcvictlr = readl(drvdata->base + TRCVICTLR);
+	state->trcviiectlr = readl(drvdata->base + TRCVIIECTLR);
+	state->trcvissctlr = readl(drvdata->base + TRCVISSCTLR);
+	state->trcvipcssctlr = readl(drvdata->base + TRCVIPCSSCTLR);
+	state->trcvdctlr = readl(drvdata->base + TRCVDCTLR);
+	state->trcvdsacctlr = readl(drvdata->base + TRCVDSACCTLR);
+	state->trcvdarcctlr = readl(drvdata->base + TRCVDARCCTLR);
+
+	for (i = 0; i < drvdata->nrseqstate; i++)
+		state->trcseqevr[i] = readl(drvdata->base + TRCSEQEVRn(i));
+
+	state->trcseqrstevr = readl(drvdata->base + TRCSEQRSTEVR);
+	state->trcseqstr = readl(drvdata->base + TRCSEQSTR);
+	state->trcextinselr = readl(drvdata->base + TRCEXTINSELR);
+
+	for (i = 0; i < drvdata->nr_cntr; i++) {
+		state->trccntrldvr[i] = readl(drvdata->base + TRCCNTRLDVRn(i));
+		state->trccntctlr[i] = readl(drvdata->base + TRCCNTCTLRn(i));
+		state->trccntvr[i] = readl(drvdata->base + TRCCNTVRn(i));
+	}
+
+	for (i = 0; i < drvdata->nr_resource * 2; i++)
+		state->trcrsctlr[i] = readl(drvdata->base + TRCRSCTLRn(i));
+
+	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
+		state->trcssccr[i] = readl(drvdata->base + TRCSSCCRn(i));
+		state->trcsscsr[i] = readl(drvdata->base + TRCSSCSRn(i));
+		state->trcsspcicr[i] = readl(drvdata->base + TRCSSPCICRn(i));
+	}
+
+	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
+		state->trcacvr[i] = readl(drvdata->base + TRCACVRn(i));
+		state->trcacatr[i] = readl(drvdata->base + TRCACATRn(i));
+	}
+
+	for (i = 0; i < drvdata->numcidc; i++)
+		state->trccidcvr[i] = readl(drvdata->base + TRCCIDCVRn(i));
+
+	for (i = 0; i < drvdata->numvmidc; i++)
+		state->trcvmidcvr[i] = readl(drvdata->base + TRCVMIDCVRn(i));
+
+	state->trccidcctlr0 = readl(drvdata->base + TRCCIDCCTLR0);
+	state->trccidcctlr1 = readl(drvdata->base + TRCCIDCCTLR1);
+
+	state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR0);
+	state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR1);
+
+	state->trcclaimset = readl(drvdata->base + TRCCLAIMCLR);
+
+	/* wait for TRCSTATR.IDLE to go up */
+	if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1))
+		dev_err(drvdata->dev,
+			"timeout while waiting for Idle Trace Status\n");
+
+	/* power can be removed from the trace unit now */
+	control = readl_relaxed(drvdata->base + TRCPDCR);
+	control &= ~TRCPDCR_PU;
+	writel_relaxed(control, drvdata->base + TRCPDCR);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void etm4_cpu_restore(struct etmv4_drvdata *drvdata)
+{
+	int i;
+	struct etmv4_save_state *state;
+
+	state = &drvdata->save_state;
+
+	CS_UNLOCK(drvdata->base);
+
+	writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
+
+	writel_relaxed(state->trcprgctlr, drvdata->base + TRCPRGCTLR);
+	writel_relaxed(state->trcprocselr, drvdata->base + TRCPROCSELR);
+	writel_relaxed(state->trcconfigr, drvdata->base + TRCCONFIGR);
+	writel_relaxed(state->trcauxctlr, drvdata->base + TRCAUXCTLR);
+	writel_relaxed(state->trceventctl0r, drvdata->base + TRCEVENTCTL0R);
+	writel_relaxed(state->trceventctl1r, drvdata->base + TRCEVENTCTL1R);
+	writel_relaxed(state->trcstallctlr, drvdata->base + TRCSTALLCTLR);
+	writel_relaxed(state->trctsctlr, drvdata->base + TRCTSCTLR);
+	writel_relaxed(state->trcsyncpr, drvdata->base + TRCSYNCPR);
+	writel_relaxed(state->trcccctlr, drvdata->base + TRCCCCTLR);
+	writel_relaxed(state->trcbbctlr, drvdata->base + TRCBBCTLR);
+	writel_relaxed(state->trctraceidr, drvdata->base + TRCTRACEIDR);
+	writel_relaxed(state->trcqctlr, drvdata->base + TRCQCTLR);
+
+	writel_relaxed(state->trcvictlr, drvdata->base + TRCVICTLR);
+	writel_relaxed(state->trcviiectlr, drvdata->base + TRCVIIECTLR);
+	writel_relaxed(state->trcvissctlr, drvdata->base + TRCVISSCTLR);
+	writel_relaxed(state->trcvipcssctlr, drvdata->base + TRCVIPCSSCTLR);
+	writel_relaxed(state->trcvdctlr, drvdata->base + TRCVDCTLR);
+	writel_relaxed(state->trcvdsacctlr, drvdata->base + TRCVDSACCTLR);
+	writel_relaxed(state->trcvdarcctlr, drvdata->base + TRCVDARCCTLR);
+
+	for (i = 0; i < drvdata->nrseqstate; i++)
+		writel_relaxed(state->trcseqevr[i],
+					drvdata->base + TRCSEQEVRn(i));
+
+	writel_relaxed(state->trcseqrstevr, drvdata->base + TRCSEQRSTEVR);
+	writel_relaxed(state->trcseqstr, drvdata->base + TRCSEQSTR);
+	writel_relaxed(state->trcextinselr, drvdata->base + TRCEXTINSELR);
+
+	for (i = 0; i < drvdata->nr_cntr; i++) {
+		writel_relaxed(state->trccntrldvr[i],
+					drvdata->base + TRCCNTRLDVRn(i));
+		writel_relaxed(state->trccntctlr[i],
+					drvdata->base + TRCCNTCTLRn(i));
+		writel_relaxed(state->trccntvr[i],
+					drvdata->base + TRCCNTVRn(i));
+	}
+
+	for (i = 0; i < drvdata->nr_resource * 2; i++)
+		writel_relaxed(state->trcrsctlr[i],
+					drvdata->base + TRCRSCTLRn(i));
+
+	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
+		writel_relaxed(state->trcssccr[i],
+					drvdata->base + TRCSSCCRn(i));
+		writel_relaxed(state->trcsscsr[i],
+					drvdata->base + TRCSSCSRn(i));
+		writel_relaxed(state->trcsspcicr[i],
+					drvdata->base + TRCSSPCICRn(i));
+	}
+
+	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
+		writel_relaxed(state->trcacvr[i],
+					drvdata->base + TRCACVRn(i));
+		writel_relaxed(state->trcacatr[i],
+					drvdata->base + TRCACATRn(i));
+	}
+
+	for (i = 0; i < drvdata->numcidc; i++)
+		writel_relaxed(state->trccidcvr[i],
+					drvdata->base + TRCCIDCVRn(i));
+
+	for (i = 0; i < drvdata->numvmidc; i++)
+		writel_relaxed(state->trcvmidcvr[i],
+					drvdata->base + TRCVMIDCVRn(i));
+
+	writel_relaxed(state->trccidcctlr0, drvdata->base + TRCCIDCCTLR0);
+	writel_relaxed(state->trccidcctlr1, drvdata->base + TRCCIDCCTLR1);
+
+	writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR0);
+	writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR1);
+
+	writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
+
+	/* As recommended by 4.3.7 of ARM IHI 0064D */
+	dsb(sy);
+	isb();
+
+	etm4_os_unlock(drvdata);
+	CS_LOCK(drvdata->base);
+}
+
+static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
+			      void *v)
+{
+	struct etmv4_drvdata *drvdata = container_of(nb,
+					struct etmv4_drvdata, nb);
+
+	if (disable_pm_save)
+		return NOTIFY_OK;
+
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		/* save the state if coresight is in use */
+		if (coresight_is_claimed_any(drvdata->base))
+			etm4_cpu_save(drvdata);
+		break;
+	case CPU_PM_EXIT:
+	case CPU_PM_ENTER_FAILED:
+		/* trcclaimset is set when there is state to restore */
+		if (drvdata->save_state.trcclaimset)
+			etm4_cpu_restore(drvdata);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int etm4_cpu_pm_register(struct etmv4_drvdata *drvdata)
+{
+	drvdata->nb.notifier_call = etm4_cpu_pm_notify;
+	return cpu_pm_register_notifier(&drvdata->nb);
+}
+#else
+static int etm4_cpu_pm_register(struct etmv4_drvdata *drvdata) { return 0; }
+#endif
+
 static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
 {
 	int ret;
@@ -1141,6 +1382,10 @@  static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
 	etm4_init_trace_id(drvdata);
 	etm4_set_default(&drvdata->config);
 
+	ret = etm4_cpu_pm_register(drvdata);
+	if (ret)
+		goto err_arch_supported;
+
 	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
 	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
 	desc.ops = &etm4_cs_ops;
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
index 52786e9d8926..f4cff447c8a1 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -174,7 +174,8 @@ 
 					 ETM_MODE_EXCL_KERN | \
 					 ETM_MODE_EXCL_USER)
 
-#define TRCSTATR_IDLE_BIT		0
+#define TRCSTATR_IDLE_BIT		BIT(0)
+#define TRCSTATR_PMSTABLE_BIT		BIT(1)
 #define ETM_DEFAULT_ADDR_COMP		0
 
 /* PowerDown Control Register bits */
@@ -281,6 +282,65 @@  struct etmv4_config {
 	u32				ext_inp;
 };
 
+/**
+ * struct etm4_save_state - state to be preserved when ETM is without power
+ */
+struct etmv4_save_state {
+	u32	trcprgctlr;
+	u32	trcprocselr;
+	u32	trcconfigr;
+	u32	trcauxctlr;
+	u32	trceventctl0r;
+	u32	trceventctl1r;
+	u32	trcstallctlr;
+	u32	trctsctlr;
+	u32	trcsyncpr;
+	u32	trcccctlr;
+	u32	trcbbctlr;
+	u32	trctraceidr;
+	u32	trcqctlr;
+
+	u32	trcvictlr;
+	u32	trcviiectlr;
+	u32	trcvissctlr;
+	u32	trcvipcssctlr;
+	u32	trcvdctlr;
+	u32	trcvdsacctlr;
+	u32	trcvdarcctlr;
+
+	u32	trcseqevr[ETM_MAX_SEQ_STATES];
+	u32	trcseqrstevr;
+	u32	trcseqstr;
+	u32	trcextinselr;
+	u32	trccntrldvr[ETMv4_MAX_CNTR];
+	u32	trccntctlr[ETMv4_MAX_CNTR];
+	u32	trccntvr[ETMv4_MAX_CNTR];
+
+	u32	trcrsctlr[ETM_MAX_RES_SEL * 2];
+
+	u32	trcssccr[ETM_MAX_SS_CMP];
+	u32	trcsscsr[ETM_MAX_SS_CMP];
+	u32	trcsspcicr[ETM_MAX_SS_CMP];
+
+	u64	trcacvr[ETM_MAX_SINGLE_ADDR_CMP];
+	u64	trcacatr[ETM_MAX_SINGLE_ADDR_CMP];
+	u64	trcdvcvr[ETM_MAX_DATA_VAL_CMP];
+	u64	trcdvcmr[ETM_MAX_DATA_VAL_CMP];
+	u64	trccidcvr[ETMv4_MAX_CTXID_CMP];
+	u32	trcvmidcvr[ETM_MAX_VMID_CMP];
+	u32	trccidcctlr0;
+	u32	trccidcctlr1;
+	u32	trcvmidcctlr0;
+	u32	trcvmidcctlr1;
+
+	u32	trcclaimset;
+
+	u32	cntr_val[ETMv4_MAX_CNTR];
+	u32	seq_state;
+	u32	vinst_ctrl;
+	u32	ss_status[ETM_MAX_SS_CMP];
+};
+
 /**
  * struct etm4_drvdata - specifics associated to an ETM component
  * @base:       Memory mapped base address for this component.
@@ -337,6 +397,8 @@  struct etmv4_config {
  * @atbtrig:	If the implementation can support ATB triggers
  * @lpoverride:	If the implementation can support low-power state over.
  * @config:	structure holding configuration parameters.
+ * @save_state:	State to be preserved across power loss
+ * @nb:		CPU PM notifier
  */
 struct etmv4_drvdata {
 	void __iomem			*base;
@@ -383,6 +445,8 @@  struct etmv4_drvdata {
 	bool				atbtrig;
 	bool				lpoverride;
 	struct etmv4_config		config;
+	struct etmv4_save_state		save_state;
+	struct notifier_block		nb;
 };
 
 /* Address comparator access types */
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 4b130281236a..e85d09e597a0 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -140,7 +140,7 @@  static inline bool coresight_is_claimed_self_hosted(void __iomem *base)
 	return coresight_read_claim_tags(base) == CORESIGHT_CLAIM_SELF_HOSTED;
 }
 
-static inline bool coresight_is_claimed_any(void __iomem *base)
+bool coresight_is_claimed_any(void __iomem *base)
 {
 	return coresight_read_claim_tags(base) != 0;
 }
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 62a520df8add..4f7ba923ffc4 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -268,6 +268,8 @@  extern int coresight_claim_device_unlocked(void __iomem *base);
 extern void coresight_disclaim_device(void __iomem *base);
 extern void coresight_disclaim_device_unlocked(void __iomem *base);
 
+extern bool coresight_is_claimed_any(void __iomem *base);
+
 #else
 static inline struct coresight_device *
 coresight_register(struct coresight_desc *desc) { return NULL; }
@@ -290,6 +292,11 @@  static inline int coresight_claim_device(void __iomem *base)
 static inline void coresight_disclaim_device(void __iomem *base) {}
 static inline void coresight_disclaim_device_unlocked(void __iomem *base) {}
 
+static inline bool coresight_is_claimed_any(void __iomem *base)
+{
+	return false;
+}
+
 #endif
 
 #ifdef CONFIG_OF