diff mbox

[v2,1/2] ARM64: arch_timer: Work around QorIQ Erratum A-008585

Message ID 1463114260-8724-1-git-send-email-oss@buserror.net (mailing list archive)
State New, archived
Headers show

Commit Message

Crystal Wood May 13, 2016, 4:37 a.m. UTC
Erratum A-008585 says that the ARM generic timer counter "has the
potential to contain an erroneous value for a small number of core
clock cycles every time the timer value changes".  Accesses to TVAL
(both read and write) are also affected due to the implicit counter
read.  Accesses to CVAL are not affected.

The workaround is to reread TVAL and count registers until successive reads
return the same value, and when writing TVAL to retry until counter
reads before and after the write return the same value.

This erratum can be found on LS1043A and LS2080A.

Signed-off-by: Scott Wood <oss@buserror.net>
---
v2:
Significant rework based on feedback, including using static_key,
disabling VDSO counter access rather than adding the workaround to the
VDSO, and uninlining the loops.

Dropped the separate property for indicating that writes to TVAL are
affected, as I believe that's just a side effect of the implicit
counter read being corrupted, and thus a chip that is affected by one
will always be affected by the other.

Dropped the arm32 portion as it seems there was confusion about whether
LS1021A is affected.  Currently I am being told that it is not
affected.

I considered writing to CVAL rather than looping on TVAL writes, but
that would still have required separate set_next_event() code for the
erratum, and adding CVAL to the enum would have required a bunch of
extra handlers in switch statements (even where unused, due to compiler
warnings about unhandled enum values) including in an arm32 header.  It
seemed better to avoid the arm32 interaction and new untested
accessors.
---
 .../devicetree/bindings/arm/arch_timer.txt         |   6 ++
 arch/arm64/include/asm/arch_timer.h                |  37 +++++--
 drivers/clocksource/arm_arch_timer.c               | 110 +++++++++++++++++++++
 3 files changed, 144 insertions(+), 9 deletions(-)

Comments

Marc Zyngier May 13, 2016, 10:24 a.m. UTC | #1
On Thu, 12 May 2016 23:37:39 -0500
Scott Wood <oss@buserror.net> wrote:

Hi Scott,

> Erratum A-008585 says that the ARM generic timer counter "has the
> potential to contain an erroneous value for a small number of core
> clock cycles every time the timer value changes".  Accesses to TVAL
> (both read and write) are also affected due to the implicit counter
> read.  Accesses to CVAL are not affected.
> 
> The workaround is to reread TVAL and count registers until successive reads
> return the same value, and when writing TVAL to retry until counter
> reads before and after the write return the same value.
> 
> This erratum can be found on LS1043A and LS2080A.
> 
> Signed-off-by: Scott Wood <oss@buserror.net>
> ---
> v2:
> Significant rework based on feedback, including using static_key,
> disabling VDSO counter access rather than adding the workaround to the
> VDSO, and uninlining the loops.
> 
> Dropped the separate property for indicating that writes to TVAL are
> affected, as I believe that's just a side effect of the implicit
> counter read being corrupted, and thus a chip that is affected by one
> will always be affected by the other.
> 
> Dropped the arm32 portion as it seems there was confusion about whether
> LS1021A is affected.  Currently I am being told that it is not
> affected.
> 
> I considered writing to CVAL rather than looping on TVAL writes, but
> that would still have required separate set_next_event() code for the
> erratum, and adding CVAL to the enum would have required a bunch of
> extra handlers in switch statements (even where unused, due to compiler
> warnings about unhandled enum values) including in an arm32 header.  It
> seemed better to avoid the arm32 interaction and new untested
> accessors.
> ---
>  .../devicetree/bindings/arm/arch_timer.txt         |   6 ++
>  arch/arm64/include/asm/arch_timer.h                |  37 +++++--
>  drivers/clocksource/arm_arch_timer.c               | 110 +++++++++++++++++++++
>  3 files changed, 144 insertions(+), 9 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt
> index e774128..ef5fbe9 100644
> --- a/Documentation/devicetree/bindings/arm/arch_timer.txt
> +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
> @@ -25,6 +25,12 @@ to deliver its interrupts via SPIs.
>  - always-on : a boolean property. If present, the timer is powered through an
>    always-on power domain, therefore it never loses context.
>  
> +- fsl,erratum-a008585 : A boolean property. Indicates the presence of
> +  QorIQ erratum A-008585, which says that reading the counter is
> +  unreliable unless the same value is returned by back-to-back reads.
> +  This also affects writes to the tval register, due to the implicit
> +  counter read.
> +
>  ** Optional properties:
>  
>  - arm,cpu-registers-not-fw-configured : Firmware does not initialize

This should be part of a separate patch. Also, errata should be
documented in Documentation/arm64/silicon-errata.txt.

> diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
> index fbe0ca3..9715e85 100644
> --- a/arch/arm64/include/asm/arch_timer.h
> +++ b/arch/arm64/include/asm/arch_timer.h
> @@ -23,10 +23,33 @@
>  
>  #include <linux/bug.h>
>  #include <linux/init.h>
> +#include <linux/jump_label.h>
>  #include <linux/types.h>
>  
>  #include <clocksource/arm_arch_timer.h>
>  
> +extern struct static_key_false arch_timer_read_ool_enabled;
> +
> +#define ARCH_TIMER_REG_READ(reg, func) \
> +extern u64 func##_ool(void); \
> +static inline u64 __##func(void) \
> +{ \
> +	u64 val; \
> +	asm volatile("mrs %0, " reg : "=r" (val)); \
> +	return val; \
> +} \
> +static inline u64 _##func(void) \
> +{ \
> +	if (static_branch_unlikely(&arch_timer_read_ool_enabled)) \
> +		return func##_ool(); \
> +	else \
> +		return __##func(); \
> +}
> +
> +ARCH_TIMER_REG_READ("cntp_tval_el0", arch_timer_get_ptval)
> +ARCH_TIMER_REG_READ("cntv_tval_el0", arch_timer_get_vtval)
> +ARCH_TIMER_REG_READ("cntvct_el0", arch_counter_get_cntvct)
> +

Given that this will have a (small) impact on non-affected platforms,
it'd be good to have this guarded by a erratum-specific config option
(CONFIG_FSL_ERRATUM_008585?) and turn it into trivial accessors when not
defined.

>  /*
>   * These register accessors are marked inline so the compiler can
>   * nicely work out which register we want, and chuck away the rest of
> @@ -66,19 +89,19 @@ u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
>  	if (access == ARCH_TIMER_PHYS_ACCESS) {
>  		switch (reg) {
>  		case ARCH_TIMER_REG_CTRL:
> -			asm volatile("mrs %0,  cntp_ctl_el0" : "=r" (val));
> +			asm volatile("mrs %0, cntp_ctl_el0" : "=r" (val));

Spurious change?

>  			break;
>  		case ARCH_TIMER_REG_TVAL:
> -			asm volatile("mrs %0, cntp_tval_el0" : "=r" (val));
> +			val = _arch_timer_get_ptval();
>  			break;
>  		}
>  	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
>  		switch (reg) {
>  		case ARCH_TIMER_REG_CTRL:
> -			asm volatile("mrs %0,  cntv_ctl_el0" : "=r" (val));
> +			asm volatile("mrs %0, cntv_ctl_el0" : "=r" (val));

Same here.

>  			break;
>  		case ARCH_TIMER_REG_TVAL:
> -			asm volatile("mrs %0, cntv_tval_el0" : "=r" (val));
> +			val = _arch_timer_get_vtval();
>  			break;
>  		}
>  	}
> @@ -116,12 +139,8 @@ static inline u64 arch_counter_get_cntpct(void)
>  
>  static inline u64 arch_counter_get_cntvct(void)
>  {
> -	u64 cval;
> -
>  	isb();
> -	asm volatile("mrs %0, cntvct_el0" : "=r" (cval));
> -
> -	return cval;
> +	return _arch_counter_get_cntvct();
>  }
>  
>  static inline int arch_timer_arch_init(void)
> diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
> index 5152b38..6f78831 100644
> --- a/drivers/clocksource/arm_arch_timer.c
> +++ b/drivers/clocksource/arm_arch_timer.c
> @@ -79,10 +79,52 @@ static enum ppi_nr arch_timer_uses_ppi = VIRT_PPI;
>  static bool arch_timer_c3stop;
>  static bool arch_timer_mem_use_virtual;
>  
> +DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled);
> +EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled);
> +

Once you have a config option, this can move to the guarded section
below...

>  /*
>   * Architected system timer support.
>   */
>  
> +#ifdef CONFIG_ARM64

which can become CONFIG_FSL_ERRATUM_008585

> +/*
> + * __always_inline is used to ensure that func() is not an actual function
> + * pointer, which would result in the register accesses potentially being too
> + * far apart for the loop to work.
> + */
> +static __always_inline u64 arch_timer_reread(u64 (*func)(void))

This is quite a generic function name. I'd expect something that refers
to the erratum number.

> +{
> +	u64 cval_old, cval_new;
> +	int timeout = 200;

Can we have a comment on how this value has been chosen? 

> +
> +	do {
> +		isb();
> +		cval_old = func();
> +		cval_new = func();
> +		timeout--;
> +	} while (cval_old != cval_new && timeout);
> +
> +	WARN_ON_ONCE(!timeout);
> +	return cval_new;
> +}
> +
> +u64 arch_counter_get_cntvct_ool(void)
> +{
> +	return arch_timer_reread(__arch_counter_get_cntvct);
> +}
> +
> +u64 arch_timer_get_vtval_ool(void)
> +{
> +	return arch_timer_reread(__arch_timer_get_vtval);
> +}
> +
> +u64 arch_timer_get_ptval_ool(void)
> +{
> +	return arch_timer_reread(__arch_timer_get_ptval);
> +}
> +
> +#endif /* ARM64 */
> +
>  static __always_inline
>  void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val,
>  			  struct clock_event_device *clk)
> @@ -232,6 +274,50 @@ static __always_inline void set_next_event(const int access, unsigned long evt,
>  	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
>  }
>  
> +#ifdef CONFIG_ARM64
> +static __always_inline void rewrite_tval(const int access,
> +		unsigned long evt, struct clock_event_device *clk)
> +{
> +	u64 cval_old, cval_new;
> +	int timeout = 200;
> +
> +	do {
> +		cval_old = __arch_counter_get_cntvct();
> +		arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt, clk);
> +		cval_new = __arch_counter_get_cntvct();

Don't you need to guarantee the order of accesses here?

> +		timeout--;
> +	} while (cval_old != cval_new && timeout);
> +
> +	WARN_ON_ONCE(!timeout);
> +}
> +
> +static __always_inline void set_next_event_errata(const int access,
> +		unsigned long evt, struct clock_event_device *clk)
> +{
> +	unsigned long ctrl;
> +
> +	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
> +	ctrl |= ARCH_TIMER_CTRL_ENABLE;
> +	ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
> +	rewrite_tval(access, evt, clk);
> +	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
> +}
> +
> +static int arch_timer_set_next_event_virt_errata(unsigned long evt,
> +						 struct clock_event_device *clk)
> +{
> +	set_next_event_errata(ARCH_TIMER_VIRT_ACCESS, evt, clk);
> +	return 0;
> +}
> +
> +static int arch_timer_set_next_event_phys_errata(unsigned long evt,
> +						 struct clock_event_device *clk)
> +{
> +	set_next_event_errata(ARCH_TIMER_PHYS_ACCESS, evt, clk);
> +	return 0;
> +}

Same comment about the function names.

> +#endif /* ARM64 */
> +
>  static int arch_timer_set_next_event_virt(unsigned long evt,
>  					  struct clock_event_device *clk)
>  {
> @@ -277,6 +363,13 @@ static void __arch_timer_setup(unsigned type,
>  			clk->set_state_shutdown = arch_timer_shutdown_virt;
>  			clk->set_state_oneshot_stopped = arch_timer_shutdown_virt;
>  			clk->set_next_event = arch_timer_set_next_event_virt;
> +
> +#ifdef CONFIG_ARM64
> +			if (static_branch_unlikely(&arch_timer_read_ool_enabled))
> +				clk->set_next_event =
> +					arch_timer_set_next_event_virt_errata;

On the same line, please. You could also rewrite this as a helper
function that would populate the set_next_event field in all cases.

> +#endif
> +
>  			break;
>  		case PHYS_SECURE_PPI:
>  		case PHYS_NONSECURE_PPI:
> @@ -284,6 +377,13 @@ static void __arch_timer_setup(unsigned type,
>  			clk->set_state_shutdown = arch_timer_shutdown_phys;
>  			clk->set_state_oneshot_stopped = arch_timer_shutdown_phys;
>  			clk->set_next_event = arch_timer_set_next_event_phys;
> +
> +#ifdef CONFIG_ARM64
> +			if (static_branch_unlikely(&arch_timer_read_ool_enabled))
> +				clk->set_next_event =
> +					arch_timer_set_next_event_phys_errata;

Same here.

> +#endif
> +
>  			break;
>  		default:
>  			BUG();
> @@ -485,6 +585,13 @@ static void __init arch_counter_register(unsigned type)
>  			arch_timer_read_counter = arch_counter_get_cntvct;
>  		else
>  			arch_timer_read_counter = arch_counter_get_cntpct;
> +
> +		/*
> +		 * Don't use the vdso fastpath if errata require using
> +		 * the out-of-line counter accessor.
> +		 */
> +		if (static_branch_unlikely(&arch_timer_read_ool_enabled))
> +			clocksource_counter.name = "arch_sys_counter_ool";

This looks really ugly. How about telling the vdso subsystem directly?
Will, do you have a preference?

>  	} else {
>  		arch_timer_read_counter = arch_counter_get_cntvct_mem;
>  
> @@ -763,6 +870,9 @@ static void __init arch_timer_of_init(struct device_node *np)
>  
>  	arch_timer_c3stop = !of_property_read_bool(np, "always-on");
>  
> +	if (of_property_read_bool(np, "fsl,erratum-a008585"))
> +		static_branch_enable(&arch_timer_read_ool_enabled);
> +
>  	/*
>  	 * If we cannot rely on firmware initializing the timer registers then
>  	 * we should use the physical timers instead.

An outstanding question is how we're going to deal with this in KVM,
because a guest absolutely needs to know about it (I can definitely see
time jumping in guests running on a LS2080).

Thanks,

	M.
Rob Herring May 16, 2016, 4:14 p.m. UTC | #2
On Thu, May 12, 2016 at 11:37:39PM -0500, Scott Wood wrote:
> Erratum A-008585 says that the ARM generic timer counter "has the
> potential to contain an erroneous value for a small number of core
> clock cycles every time the timer value changes".  Accesses to TVAL
> (both read and write) are also affected due to the implicit counter
> read.  Accesses to CVAL are not affected.
> 
> The workaround is to reread TVAL and count registers until successive reads
> return the same value, and when writing TVAL to retry until counter
> reads before and after the write return the same value.
> 
> This erratum can be found on LS1043A and LS2080A.
> 
> Signed-off-by: Scott Wood <oss@buserror.net>
> ---
> v2:
> Significant rework based on feedback, including using static_key,
> disabling VDSO counter access rather than adding the workaround to the
> VDSO, and uninlining the loops.
> 
> Dropped the separate property for indicating that writes to TVAL are
> affected, as I believe that's just a side effect of the implicit
> counter read being corrupted, and thus a chip that is affected by one
> will always be affected by the other.
> 
> Dropped the arm32 portion as it seems there was confusion about whether
> LS1021A is affected.  Currently I am being told that it is not
> affected.
> 
> I considered writing to CVAL rather than looping on TVAL writes, but
> that would still have required separate set_next_event() code for the
> erratum, and adding CVAL to the enum would have required a bunch of
> extra handlers in switch statements (even where unused, due to compiler
> warnings about unhandled enum values) including in an arm32 header.  It
> seemed better to avoid the arm32 interaction and new untested
> accessors.
> ---
>  .../devicetree/bindings/arm/arch_timer.txt         |   6 ++

Acked-by: Rob Herring <robh@kernel.org>

>  arch/arm64/include/asm/arch_timer.h                |  37 +++++--
>  drivers/clocksource/arm_arch_timer.c               | 110 +++++++++++++++++++++
>  3 files changed, 144 insertions(+), 9 deletions(-)
Crystal Wood June 22, 2016, 1:45 a.m. UTC | #3
On Fri, 2016-05-13 at 11:24 +0100, Marc Zyngier wrote:
> On Thu, 12 May 2016 23:37:39 -0500
> Scott Wood <oss@buserror.net> wrote:
> 
> Hi Scott,
> 
> > Erratum A-008585 says that the ARM generic timer counter "has the
> > potential to contain an erroneous value for a small number of core
> > clock cycles every time the timer value changes".  Accesses to TVAL
> > (both read and write) are also affected due to the implicit counter
> > read.  Accesses to CVAL are not affected.
> > 
> > The workaround is to reread TVAL and count registers until successive
> > reads
> > return the same value, and when writing TVAL to retry until counter
> > reads before and after the write return the same value.
> > 
> > This erratum can be found on LS1043A and LS2080A.
> > 
> > Signed-off-by: Scott Wood <oss@buserror.net>
> > ---
> > v2:
> > Significant rework based on feedback, including using static_key,
> > disabling VDSO counter access rather than adding the workaround to the
> > VDSO, and uninlining the loops.
> > 
> > Dropped the separate property for indicating that writes to TVAL are
> > affected, as I believe that's just a side effect of the implicit
> > counter read being corrupted, and thus a chip that is affected by one
> > will always be affected by the other.
> > 
> > Dropped the arm32 portion as it seems there was confusion about whether
> > LS1021A is affected.  Currently I am being told that it is not
> > affected.
> > 
> > I considered writing to CVAL rather than looping on TVAL writes, but
> > that would still have required separate set_next_event() code for the
> > erratum, and adding CVAL to the enum would have required a bunch of
> > extra handlers in switch statements (even where unused, due to compiler
> > warnings about unhandled enum values) including in an arm32 header.  It
> > seemed better to avoid the arm32 interaction and new untested
> > accessors.
> > ---
> >  .../devicetree/bindings/arm/arch_timer.txt         |   6 ++
> >  arch/arm64/include/asm/arch_timer.h                |  37 +++++--
> >  drivers/clocksource/arm_arch_timer.c               | 110
> > +++++++++++++++++++++
> >  3 files changed, 144 insertions(+), 9 deletions(-)
> > 
> > diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt
> > b/Documentation/devicetree/bindings/arm/arch_timer.txt
> > index e774128..ef5fbe9 100644
> > --- a/Documentation/devicetree/bindings/arm/arch_timer.txt
> > +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
> > @@ -25,6 +25,12 @@ to deliver its interrupts via SPIs.
> >  - always-on : a boolean property. If present, the timer is powered
> > through an
> >    always-on power domain, therefore it never loses context.
> >  
> > +- fsl,erratum-a008585 : A boolean property. Indicates the presence of
> > +  QorIQ erratum A-008585, which says that reading the counter is
> > +  unreliable unless the same value is returned by back-to-back reads.
> > +  This also affects writes to the tval register, due to the implicit
> > +  counter read.
> > +
> >  ** Optional properties:
> >  
> >  - arm,cpu-registers-not-fw-configured : Firmware does not initialize
> 
> This should be part of a separate patch. Also, errata should be
> documented in Documentation/arm64/silicon-errata.txt.

Oh right, forgot.

> > +extern struct static_key_false arch_timer_read_ool_enabled;
> > +
> > +#define ARCH_TIMER_REG_READ(reg, func) \
> > +extern u64 func##_ool(void); \
> > +static inline u64 __##func(void) \
> > +{ \
> > +	u64 val; \
> > +	asm volatile("mrs %0, " reg : "=r" (val)); \
> > +	return val; \
> > +} \
> > +static inline u64 _##func(void) \
> > +{ \
> > +	if (static_branch_unlikely(&arch_timer_read_ool_enabled)) \
> > +		return func##_ool(); \
> > +	else \
> > +		return __##func(); \
> > +}
> > +
> > +ARCH_TIMER_REG_READ("cntp_tval_el0", arch_timer_get_ptval)
> > +ARCH_TIMER_REG_READ("cntv_tval_el0", arch_timer_get_vtval)
> > +ARCH_TIMER_REG_READ("cntvct_el0", arch_counter_get_cntvct)
> > +
> 
> Given that this will have a (small) impact on non-affected platforms,
> it'd be good to have this guarded by a erratum-specific config option
> (CONFIG_FSL_ERRATUM_008585?) and turn it into trivial accessors when not
> defined.

OK.

> 
> >  /*
> >   * These register accessors are marked inline so the compiler can
> >   * nicely work out which register we want, and chuck away the rest of
> > @@ -66,19 +89,19 @@ u32 arch_timer_reg_read_cp15(int access, enum
> > arch_timer_reg reg)
> >  	if (access == ARCH_TIMER_PHYS_ACCESS) {
> >  		switch (reg) {
> >  		case ARCH_TIMER_REG_CTRL:
> > -			asm volatile("mrs %0,  cntp_ctl_el0" : "=r"
> > (val));
> > +			asm volatile("mrs %0, cntp_ctl_el0" : "=r"
> > (val));
> 
> Spurious change?

The extra spacing seemed to be an attempt to get things to line up between the
CTRL and TVAL asm statements.  When the TVAL case was converted to a function
call, there was nothing for the above to line up with, so I moved it back to
normal spacing.

> > +{
> > +	u64 cval_old, cval_new;
> > +	int timeout = 200;
> 
> Can we have a comment on how this value has been chosen? 

It's an arbitrary value well beyond the point at which we've seen it fail.

> > @@ -232,6 +274,50 @@ static __always_inline void set_next_event(const int
> > access, unsigned long evt,
> >  	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
> >  }
> >  
> > +#ifdef CONFIG_ARM64
> > +static __always_inline void rewrite_tval(const int access,
> > +		unsigned long evt, struct clock_event_device *clk)
> > +{
> > +	u64 cval_old, cval_new;
> > +	int timeout = 200;
> > +
> > +	do {
> > +		cval_old = __arch_counter_get_cntvct();
> > +		arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt,
> > clk);
> > +		cval_new = __arch_counter_get_cntvct();
> 
> Don't you need to guarantee the order of accesses here?

I'm not 100% sure.  The erratum workaround sample code doesn't show any
barriers, and adding more barriers could make it harder for the loop to
successfully complete.  There's already a barrier after the write, so the only
concern should be whether the timer read could be reordered after the timer
write, which could cause the loop to exit even if the write was bad.  Do you
know if A53 or A57 will reorder a counter read relative to a tval write?

> > +#endif /* ARM64 */
> > +
> >  static int arch_timer_set_next_event_virt(unsigned long evt,
> >  					  struct clock_event_device *clk)
> >  {
> > @@ -277,6 +363,13 @@ static void __arch_timer_setup(unsigned type,
> >  			clk->set_state_shutdown =
> > arch_timer_shutdown_virt;
> >  			clk->set_state_oneshot_stopped =
> > arch_timer_shutdown_virt;
> >  			clk->set_next_event =
> > arch_timer_set_next_event_virt;
> > +
> > +#ifdef CONFIG_ARM64
> > +			if
> > (static_branch_unlikely(&arch_timer_read_ool_enabled))
> > +				clk->set_next_event =
> > +					arch_timer_set_next_event_virt_er
> > rata;
> 
> On the same line, please.

I was trying to avoid going beyond 80 columns.

> > @@ -485,6 +585,13 @@ static void __init arch_counter_register(unsigned
> > type)
> >  			arch_timer_read_counter =
> > arch_counter_get_cntvct;
> >  		else
> >  			arch_timer_read_counter =
> > arch_counter_get_cntpct;
> > +
> > +		/*
> > +		 * Don't use the vdso fastpath if errata require using
> > +		 * the out-of-line counter accessor.
> > +		 */
> > +		if (static_branch_unlikely(&arch_timer_read_ool_enabled))
> > +			clocksource_counter.name =
> > "arch_sys_counter_ool";
> 
> This looks really ugly. How about telling the vdso subsystem directly?
> Will, do you have a preference?

I was following the example set by "arch_mem_counter".

> >  	} else {
> >  		arch_timer_read_counter = arch_counter_get_cntvct_mem;
> >  
> > @@ -763,6 +870,9 @@ static void __init arch_timer_of_init(struct
> > device_node *np)
> >  
> >  	arch_timer_c3stop = !of_property_read_bool(np, "always-on");
> >  
> > +	if (of_property_read_bool(np, "fsl,erratum-a008585"))
> > +		static_branch_enable(&arch_timer_read_ool_enabled);
> > +
> >  	/*
> >  	 * If we cannot rely on firmware initializing the timer registers
> > then
> >  	 * we should use the physical timers instead.
> 
> An outstanding question is how we're going to deal with this in KVM,
> because a guest absolutely needs to know about it (I can definitely see
> time jumping in guests running on a LS2080).

The property will need to be in the guest's device tree.  I'm not too familiar
with how KVM handles device trees on arm...  From looking at the QEMU source
it seems that the dtb is passed in by the user.  So either that dtb will need
the erratum property in it, or QEMU (and KVM tool?) would need to patch it
 into the guest dtb based on seeing the property in /proc/device-tree.

-Scott
Ding Tianhong June 25, 2016, 7:16 a.m. UTC | #4
Hi Scott:

I have read this patch, looks fine to me, and why not add unlikely to cval_old != cval_new? I think this problem is hard to occur.

Thanks.
Ding





On 2016/6/22 9:45, Scott Wood wrote:
> On Fri, 2016-05-13 at 11:24 +0100, Marc Zyngier wrote:
>> On Thu, 12 May 2016 23:37:39 -0500
>> Scott Wood <oss@buserror.net> wrote:
>>
>> Hi Scott,
>>
>>> Erratum A-008585 says that the ARM generic timer counter "has the
>>> potential to contain an erroneous value for a small number of core
>>> clock cycles every time the timer value changes".  Accesses to TVAL
>>> (both read and write) are also affected due to the implicit counter
>>> read.  Accesses to CVAL are not affected.
>>>
>>> The workaround is to reread TVAL and count registers until successive
>>> reads
>>> return the same value, and when writing TVAL to retry until counter
>>> reads before and after the write return the same value.
>>>
>>> This erratum can be found on LS1043A and LS2080A.
>>>
>>> Signed-off-by: Scott Wood <oss@buserror.net>
>>> ---
>>> v2:
>>> Significant rework based on feedback, including using static_key,
>>> disabling VDSO counter access rather than adding the workaround to the
>>> VDSO, and uninlining the loops.
>>>
>>> Dropped the separate property for indicating that writes to TVAL are
>>> affected, as I believe that's just a side effect of the implicit
>>> counter read being corrupted, and thus a chip that is affected by one
>>> will always be affected by the other.
>>>
>>> Dropped the arm32 portion as it seems there was confusion about whether
>>> LS1021A is affected.  Currently I am being told that it is not
>>> affected.
>>>
>>> I considered writing to CVAL rather than looping on TVAL writes, but
>>> that would still have required separate set_next_event() code for the
>>> erratum, and adding CVAL to the enum would have required a bunch of
>>> extra handlers in switch statements (even where unused, due to compiler
>>> warnings about unhandled enum values) including in an arm32 header.  It
>>> seemed better to avoid the arm32 interaction and new untested
>>> accessors.
>>> ---
>>>  .../devicetree/bindings/arm/arch_timer.txt         |   6 ++
>>>  arch/arm64/include/asm/arch_timer.h                |  37 +++++--
>>>  drivers/clocksource/arm_arch_timer.c               | 110
>>> +++++++++++++++++++++
>>>  3 files changed, 144 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt
>>> b/Documentation/devicetree/bindings/arm/arch_timer.txt
>>> index e774128..ef5fbe9 100644
>>> --- a/Documentation/devicetree/bindings/arm/arch_timer.txt
>>> +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
>>> @@ -25,6 +25,12 @@ to deliver its interrupts via SPIs.
>>>  - always-on : a boolean property. If present, the timer is powered
>>> through an
>>>    always-on power domain, therefore it never loses context.
>>>  
>>> +- fsl,erratum-a008585 : A boolean property. Indicates the presence of
>>> +  QorIQ erratum A-008585, which says that reading the counter is
>>> +  unreliable unless the same value is returned by back-to-back reads.
>>> +  This also affects writes to the tval register, due to the implicit
>>> +  counter read.
>>> +
>>>  ** Optional properties:
>>>  
>>>  - arm,cpu-registers-not-fw-configured : Firmware does not initialize
>>
>> This should be part of a separate patch. Also, errata should be
>> documented in Documentation/arm64/silicon-errata.txt.
> 
> Oh right, forgot.
> 
>>> +extern struct static_key_false arch_timer_read_ool_enabled;
>>> +
>>> +#define ARCH_TIMER_REG_READ(reg, func) \
>>> +extern u64 func##_ool(void); \
>>> +static inline u64 __##func(void) \
>>> +{ \
>>> +	u64 val; \
>>> +	asm volatile("mrs %0, " reg : "=r" (val)); \
>>> +	return val; \
>>> +} \
>>> +static inline u64 _##func(void) \
>>> +{ \
>>> +	if (static_branch_unlikely(&arch_timer_read_ool_enabled)) \
>>> +		return func##_ool(); \
>>> +	else \
>>> +		return __##func(); \
>>> +}
>>> +
>>> +ARCH_TIMER_REG_READ("cntp_tval_el0", arch_timer_get_ptval)
>>> +ARCH_TIMER_REG_READ("cntv_tval_el0", arch_timer_get_vtval)
>>> +ARCH_TIMER_REG_READ("cntvct_el0", arch_counter_get_cntvct)
>>> +
>>
>> Given that this will have a (small) impact on non-affected platforms,
>> it'd be good to have this guarded by a erratum-specific config option
>> (CONFIG_FSL_ERRATUM_008585?) and turn it into trivial accessors when not
>> defined.
> 
> OK.
> 
>>
>>>  /*
>>>   * These register accessors are marked inline so the compiler can
>>>   * nicely work out which register we want, and chuck away the rest of
>>> @@ -66,19 +89,19 @@ u32 arch_timer_reg_read_cp15(int access, enum
>>> arch_timer_reg reg)
>>>  	if (access == ARCH_TIMER_PHYS_ACCESS) {
>>>  		switch (reg) {
>>>  		case ARCH_TIMER_REG_CTRL:
>>> -			asm volatile("mrs %0,  cntp_ctl_el0" : "=r"
>>> (val));
>>> +			asm volatile("mrs %0, cntp_ctl_el0" : "=r"
>>> (val));
>>
>> Spurious change?
> 
> The extra spacing seemed to be an attempt to get things to line up between the
> CTRL and TVAL asm statements.  When the TVAL case was converted to a function
> call, there was nothing for the above to line up with, so I moved it back to
> normal spacing.
> 
>>> +{
>>> +	u64 cval_old, cval_new;
>>> +	int timeout = 200;
>>
>> Can we have a comment on how this value has been chosen? 
> 
> It's an arbitrary value well beyond the point at which we've seen it fail.
> 
>>> @@ -232,6 +274,50 @@ static __always_inline void set_next_event(const int
>>> access, unsigned long evt,
>>>  	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
>>>  }
>>>  
>>> +#ifdef CONFIG_ARM64
>>> +static __always_inline void rewrite_tval(const int access,
>>> +		unsigned long evt, struct clock_event_device *clk)
>>> +{
>>> +	u64 cval_old, cval_new;
>>> +	int timeout = 200;
>>> +
>>> +	do {
>>> +		cval_old = __arch_counter_get_cntvct();
>>> +		arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt,
>>> clk);
>>> +		cval_new = __arch_counter_get_cntvct();
>>
>> Don't you need to guarantee the order of accesses here?
> 
> I'm not 100% sure.  The erratum workaround sample code doesn't show any
> barriers, and adding more barriers could make it harder for the loop to
> successfully complete.  There's already a barrier after the write, so the only
> concern should be whether the timer read could be reordered after the timer
> write, which could cause the loop to exit even if the write was bad.  Do you
> know if A53 or A57 will reorder a counter read relative to a tval write?
> 
>>> +#endif /* ARM64 */
>>> +
>>>  static int arch_timer_set_next_event_virt(unsigned long evt,
>>>  					  struct clock_event_device *clk)
>>>  {
>>> @@ -277,6 +363,13 @@ static void __arch_timer_setup(unsigned type,
>>>  			clk->set_state_shutdown =
>>> arch_timer_shutdown_virt;
>>>  			clk->set_state_oneshot_stopped =
>>> arch_timer_shutdown_virt;
>>>  			clk->set_next_event =
>>> arch_timer_set_next_event_virt;
>>> +
>>> +#ifdef CONFIG_ARM64
>>> +			if
>>> (static_branch_unlikely(&arch_timer_read_ool_enabled))
>>> +				clk->set_next_event =
>>> +					arch_timer_set_next_event_virt_er
>>> rata;
>>
>> On the same line, please.
> 
> I was trying to avoid going beyond 80 columns.
> 
>>> @@ -485,6 +585,13 @@ static void __init arch_counter_register(unsigned
>>> type)
>>>  			arch_timer_read_counter =
>>> arch_counter_get_cntvct;
>>>  		else
>>>  			arch_timer_read_counter =
>>> arch_counter_get_cntpct;
>>> +
>>> +		/*
>>> +		 * Don't use the vdso fastpath if errata require using
>>> +		 * the out-of-line counter accessor.
>>> +		 */
>>> +		if (static_branch_unlikely(&arch_timer_read_ool_enabled))
>>> +			clocksource_counter.name =
>>> "arch_sys_counter_ool";
>>
>> This looks really ugly. How about telling the vdso subsystem directly?
>> Will, do you have a preference?
> 
> I was following the example set by "arch_mem_counter".
> 
>>>  	} else {
>>>  		arch_timer_read_counter = arch_counter_get_cntvct_mem;
>>>  
>>> @@ -763,6 +870,9 @@ static void __init arch_timer_of_init(struct
>>> device_node *np)
>>>  
>>>  	arch_timer_c3stop = !of_property_read_bool(np, "always-on");
>>>  
>>> +	if (of_property_read_bool(np, "fsl,erratum-a008585"))
>>> +		static_branch_enable(&arch_timer_read_ool_enabled);
>>> +
>>>  	/*
>>>  	 * If we cannot rely on firmware initializing the timer registers
>>> then
>>>  	 * we should use the physical timers instead.
>>
>> An outstanding question is how we're going to deal with this in KVM,
>> because a guest absolutely needs to know about it (I can definitely see
>> time jumping in guests running on a LS2080).
> 
> The property will need to be in the guest's device tree.  I'm not too familiar
> with how KVM handles device trees on arm...  From looking at the QEMU source
> it seems that the dtb is passed in by the user.  So either that dtb will need
> the erratum property in it, or QEMU (and KVM tool?) would need to patch it
>  into the guest dtb based on seeing the property in /proc/device-tree.
> 
> -Scott
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 
> .
>
Marc Zyngier June 27, 2016, 1:13 p.m. UTC | #5
On 22/06/16 02:45, Scott Wood wrote:
> On Fri, 2016-05-13 at 11:24 +0100, Marc Zyngier wrote:
>> On Thu, 12 May 2016 23:37:39 -0500
>> Scott Wood <oss@buserror.net> wrote:
>>
>> Hi Scott,
>>
>>> Erratum A-008585 says that the ARM generic timer counter "has the
>>> potential to contain an erroneous value for a small number of core
>>> clock cycles every time the timer value changes".  Accesses to TVAL
>>> (both read and write) are also affected due to the implicit counter
>>> read.  Accesses to CVAL are not affected.
>>>
>>> The workaround is to reread TVAL and count registers until successive
>>> reads
>>> return the same value, and when writing TVAL to retry until counter
>>> reads before and after the write return the same value.
>>>
>>> This erratum can be found on LS1043A and LS2080A.
>>>
>>> Signed-off-by: Scott Wood <oss@buserror.net>
>>> ---
>>> v2:
>>> Significant rework based on feedback, including using static_key,
>>> disabling VDSO counter access rather than adding the workaround to the
>>> VDSO, and uninlining the loops.
>>>
>>> Dropped the separate property for indicating that writes to TVAL are
>>> affected, as I believe that's just a side effect of the implicit
>>> counter read being corrupted, and thus a chip that is affected by one
>>> will always be affected by the other.
>>>
>>> Dropped the arm32 portion as it seems there was confusion about whether
>>> LS1021A is affected.  Currently I am being told that it is not
>>> affected.
>>>
>>> I considered writing to CVAL rather than looping on TVAL writes, but
>>> that would still have required separate set_next_event() code for the
>>> erratum, and adding CVAL to the enum would have required a bunch of
>>> extra handlers in switch statements (even where unused, due to compiler
>>> warnings about unhandled enum values) including in an arm32 header.  It
>>> seemed better to avoid the arm32 interaction and new untested
>>> accessors.
>>> ---
>>>  .../devicetree/bindings/arm/arch_timer.txt         |   6 ++
>>>  arch/arm64/include/asm/arch_timer.h                |  37 +++++--
>>>  drivers/clocksource/arm_arch_timer.c               | 110
>>> +++++++++++++++++++++
>>>  3 files changed, 144 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt
>>> b/Documentation/devicetree/bindings/arm/arch_timer.txt
>>> index e774128..ef5fbe9 100644
>>> --- a/Documentation/devicetree/bindings/arm/arch_timer.txt
>>> +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
>>> @@ -25,6 +25,12 @@ to deliver its interrupts via SPIs.
>>>  - always-on : a boolean property. If present, the timer is powered
>>> through an
>>>    always-on power domain, therefore it never loses context.
>>>  
>>> +- fsl,erratum-a008585 : A boolean property. Indicates the presence of
>>> +  QorIQ erratum A-008585, which says that reading the counter is
>>> +  unreliable unless the same value is returned by back-to-back reads.
>>> +  This also affects writes to the tval register, due to the implicit
>>> +  counter read.
>>> +
>>>  ** Optional properties:
>>>  
>>>  - arm,cpu-registers-not-fw-configured : Firmware does not initialize
>>
>> This should be part of a separate patch. Also, errata should be
>> documented in Documentation/arm64/silicon-errata.txt.
> 
> Oh right, forgot.
> 
>>> +extern struct static_key_false arch_timer_read_ool_enabled;
>>> +
>>> +#define ARCH_TIMER_REG_READ(reg, func) \
>>> +extern u64 func##_ool(void); \
>>> +static inline u64 __##func(void) \
>>> +{ \
>>> +	u64 val; \
>>> +	asm volatile("mrs %0, " reg : "=r" (val)); \
>>> +	return val; \
>>> +} \
>>> +static inline u64 _##func(void) \
>>> +{ \
>>> +	if (static_branch_unlikely(&arch_timer_read_ool_enabled)) \
>>> +		return func##_ool(); \
>>> +	else \
>>> +		return __##func(); \
>>> +}
>>> +
>>> +ARCH_TIMER_REG_READ("cntp_tval_el0", arch_timer_get_ptval)
>>> +ARCH_TIMER_REG_READ("cntv_tval_el0", arch_timer_get_vtval)
>>> +ARCH_TIMER_REG_READ("cntvct_el0", arch_counter_get_cntvct)
>>> +
>>
>> Given that this will have a (small) impact on non-affected platforms,
>> it'd be good to have this guarded by a erratum-specific config option
>> (CONFIG_FSL_ERRATUM_008585?) and turn it into trivial accessors when not
>> defined.
> 
> OK.
> 
>>
>>>  /*
>>>   * These register accessors are marked inline so the compiler can
>>>   * nicely work out which register we want, and chuck away the rest of
>>> @@ -66,19 +89,19 @@ u32 arch_timer_reg_read_cp15(int access, enum
>>> arch_timer_reg reg)
>>>  	if (access == ARCH_TIMER_PHYS_ACCESS) {
>>>  		switch (reg) {
>>>  		case ARCH_TIMER_REG_CTRL:
>>> -			asm volatile("mrs %0,  cntp_ctl_el0" : "=r"
>>> (val));
>>> +			asm volatile("mrs %0, cntp_ctl_el0" : "=r"
>>> (val));
>>
>> Spurious change?
> 
> The extra spacing seemed to be an attempt to get things to line up between the
> CTRL and TVAL asm statements.  When the TVAL case was converted to a function
> call, there was nothing for the above to line up with, so I moved it back to
> normal spacing.
> 
>>> +{
>>> +	u64 cval_old, cval_new;
>>> +	int timeout = 200;
>>
>> Can we have a comment on how this value has been chosen? 
> 
> It's an arbitrary value well beyond the point at which we've seen it fail.

So can we please have a comment *in the code* that explains how this
value has been picked?

> 
>>> @@ -232,6 +274,50 @@ static __always_inline void set_next_event(const int
>>> access, unsigned long evt,
>>>  	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
>>>  }
>>>  
>>> +#ifdef CONFIG_ARM64
>>> +static __always_inline void rewrite_tval(const int access,
>>> +		unsigned long evt, struct clock_event_device *clk)
>>> +{
>>> +	u64 cval_old, cval_new;
>>> +	int timeout = 200;
>>> +
>>> +	do {
>>> +		cval_old = __arch_counter_get_cntvct();
>>> +		arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt,
>>> clk);
>>> +		cval_new = __arch_counter_get_cntvct();
>>
>> Don't you need to guarantee the order of accesses here?
> 
> I'm not 100% sure.  The erratum workaround sample code doesn't show any
> barriers, and adding more barriers could make it harder for the loop to
> successfully complete.  There's already a barrier after the write, so the only
> concern should be whether the timer read could be reordered after the timer
> write, which could cause the loop to exit even if the write was bad.  Do you
> know if A53 or A57 will reorder a counter read relative to a tval write?

I can't see any absolute guarantee that they wouldn't be reordered (but
I have no insight on the micro-architecture either). I'd rather err on
the side of caution here.

> 
>>> +#endif /* ARM64 */
>>> +
>>>  static int arch_timer_set_next_event_virt(unsigned long evt,
>>>  					  struct clock_event_device *clk)
>>>  {
>>> @@ -277,6 +363,13 @@ static void __arch_timer_setup(unsigned type,
>>>  			clk->set_state_shutdown =
>>> arch_timer_shutdown_virt;
>>>  			clk->set_state_oneshot_stopped =
>>> arch_timer_shutdown_virt;
>>>  			clk->set_next_event =
>>> arch_timer_set_next_event_virt;
>>> +
>>> +#ifdef CONFIG_ARM64
>>> +			if
>>> (static_branch_unlikely(&arch_timer_read_ool_enabled))
>>> +				clk->set_next_event =
>>> +					arch_timer_set_next_event_virt_er
>>> rata;
>>
>> On the same line, please.
> 
> I was trying to avoid going beyond 80 columns.

Please ignore what checkpatch says. Readability is more important (and
I've given up using a vintage vt100...).

> 
>>> @@ -485,6 +585,13 @@ static void __init arch_counter_register(unsigned
>>> type)
>>>  			arch_timer_read_counter =
>>> arch_counter_get_cntvct;
>>>  		else
>>>  			arch_timer_read_counter =
>>> arch_counter_get_cntpct;
>>> +
>>> +		/*
>>> +		 * Don't use the vdso fastpath if errata require using
>>> +		 * the out-of-line counter accessor.
>>> +		 */
>>> +		if (static_branch_unlikely(&arch_timer_read_ool_enabled))
>>> +			clocksource_counter.name =
>>> "arch_sys_counter_ool";
>>
>> This looks really ugly. How about telling the vdso subsystem directly?
>> Will, do you have a preference?
> 
> I was following the example set by "arch_mem_counter".

The right thing should probably be to use a flag in the clocksource
structure to mark it as "suitable for vdso", but I guess we can fix that
in a subsequent patch.

> 
>>>  	} else {
>>>  		arch_timer_read_counter = arch_counter_get_cntvct_mem;
>>>  
>>> @@ -763,6 +870,9 @@ static void __init arch_timer_of_init(struct
>>> device_node *np)
>>>  
>>>  	arch_timer_c3stop = !of_property_read_bool(np, "always-on");
>>>  
>>> +	if (of_property_read_bool(np, "fsl,erratum-a008585"))
>>> +		static_branch_enable(&arch_timer_read_ool_enabled);
>>> +
>>>  	/*
>>>  	 * If we cannot rely on firmware initializing the timer registers
>>> then
>>>  	 * we should use the physical timers instead.
>>
>> An outstanding question is how we're going to deal with this in KVM,
>> because a guest absolutely needs to know about it (I can definitely see
>> time jumping in guests running on a LS2080).
> 
> The property will need to be in the guest's device tree.  I'm not too familiar
> with how KVM handles device trees on arm...  From looking at the QEMU source
> it seems that the dtb is passed in by the user.  So either that dtb will need
> the erratum property in it, or QEMU (and KVM tool?) would need to patch it
>  into the guest dtb based on seeing the property in /proc/device-tree.

There is no guarantee that the host device tree is always accessible to
userspace. So we're probably looking at requiring a new KVM device API
that would expose the timer properties, one of them being this erratum.
That's certainly going to be fun to handle.

Thanks,

	M.
Crystal Wood June 29, 2016, 2:05 a.m. UTC | #6
On Mon, 2016-06-27 at 14:13 +0100, Marc Zyngier wrote:
> On 22/06/16 02:45, Scott Wood wrote:
> > 
> > On Fri, 2016-05-13 at 11:24 +0100, Marc Zyngier wrote:
> > > 
> > > On Thu, 12 May 2016 23:37:39 -0500
> > > Scott Wood <oss@buserror.net> wrote:
> > > 
> > > > +{
> > > > +	u64 cval_old, cval_new;
> > > > +	int timeout = 200;
> > > Can we have a comment on how this value has been chosen? 
> > It's an arbitrary value well beyond the point at which we've seen it fail.
> So can we please have a comment *in the code* that explains how this
> value has been picked?

Sure, if you want.  I just wasn't sure there was much value in a comment that
essentially says that there's no special meaning to this particular value.

> > > > int
> > > > access, unsigned long evt,
> > > >  	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
> > > >  }
> > > >  
> > > > +#ifdef CONFIG_ARM64
> > > > +static __always_inline void rewrite_tval(const int access,
> > > > +		unsigned long evt, struct clock_event_device *clk)
> > > > +{
> > > > +	u64 cval_old, cval_new;
> > > > +	int timeout = 200;
> > > > +
> > > > +	do {
> > > > +		cval_old = __arch_counter_get_cntvct();
> > > > +		arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL,
> > > > evt,
> > > > clk);
> > > > +		cval_new = __arch_counter_get_cntvct();
> > > Don't you need to guarantee the order of accesses here?
> > I'm not 100% sure.  The erratum workaround sample code doesn't show any
> > barriers, and adding more barriers could make it harder for the loop to
> > successfully complete.  There's already a barrier after the write, so the
> > only
> > concern should be whether the timer read could be reordered after the
> > timer
> > write, which could cause the loop to exit even if the write was bad.  Do
> > you
> > know if A53 or A57 will reorder a counter read relative to a tval write?
> I can't see any absolute guarantee that they wouldn't be reordered (but
> I have no insight on the micro-architecture either). I'd rather err on
> the side of caution here.

OK, I'll see how well it works with the added barrier.

> > > >  			clk->set_state_shutdown =
> > > > arch_timer_shutdown_virt;
> > > >  			clk->set_state_oneshot_stopped =
> > > > arch_timer_shutdown_virt;
> > > >  			clk->set_next_event =
> > > > arch_timer_set_next_event_virt;
> > > > +
> > > > +#ifdef CONFIG_ARM64
> > > > +			if
> > > > (static_branch_unlikely(&arch_timer_read_ool_enabled))
> > > > +				clk->set_next_event =
> > > > +					arch_timer_set_next_event_vir
> > > > t_er
> > > > rata;
> > > On the same line, please.
> > I was trying to avoid going beyond 80 columns.
> Please ignore what checkpatch says. Readability is more important (and
> I've given up using a vintage vt100...).

I'm not using a vintage vt100 either but I still have (approximately) 80-
column terminals, because I like having two terminals side-by-side with a
reasonable font size... and since different people have different terminal
setups, CodingStyle specifies a standard limit.

I think I can make it moot with the suggestion to have a helper function,
though.

> > >  	} else {
> > > >  		arch_timer_read_counter =
> > > > arch_counter_get_cntvct_mem;
> > > >  
> > > > @@ -763,6 +870,9 @@ static void __init arch_timer_of_init(struct
> > > > device_node *np)
> > > >  
> > > >  	arch_timer_c3stop = !of_property_read_bool(np, "always-on");
> > > >  
> > > > +	if (of_property_read_bool(np, "fsl,erratum-a008585"))
> > > > +		static_branch_enable(&arch_timer_read_ool_enabled);
> > > > +
> > > >  	/*
> > > >  	 * If we cannot rely on firmware initializing the timer
> > > > registers
> > > > then
> > > >  	 * we should use the physical timers instead.
> > > An outstanding question is how we're going to deal with this in KVM,
> > > because a guest absolutely needs to know about it (I can definitely see
> > > time jumping in guests running on a LS2080).
> > The property will need to be in the guest's device tree.  I'm not too
> > familiar
> > with how KVM handles device trees on arm...  From looking at the QEMU
> > source
> > it seems that the dtb is passed in by the user.  So either that dtb will
> > need
> > the erratum property in it, or QEMU (and KVM tool?) would need to patch it
> >  into the guest dtb based on seeing the property in /proc/device-tree.
> There is no guarantee that the host device tree is always accessible to
> userspace. So we're probably looking at requiring a new KVM device API
> that would expose the timer properties, one of them being this erratum.
> That's certainly going to be fun to handle.

KVM on PPC relies on /proc/device-tree -- when would it not be available?
 Where is the dtb passed to QEMU expected to come from?

In any case, let's get the kernel sorted out first.

-Scott
Hanjun Guo June 29, 2016, 7:56 a.m. UTC | #7
Hello Scott,

On 2016/5/13 12:37, Scott Wood wrote:
[...]
>  
> +#ifdef CONFIG_ARM64
> +static __always_inline void rewrite_tval(const int access,
> +		unsigned long evt, struct clock_event_device *clk)
> +{
> +	u64 cval_old, cval_new;
> +	int timeout = 200;
> +
> +	do {
> +		cval_old = __arch_counter_get_cntvct();
> +		arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt, clk);

For not memory mapped timer, it will call arch_timer_reg_write_cp15() which has
isb() at the end of arch_timer_reg_write_cp15()...

> +		cval_new = __arch_counter_get_cntvct();

So there is isb() between counter retry read, I think it's likely cval_new will
not be equal with cval_old when the cntvct is correct (time lapse is more than
one arch timer cycle).

> +		timeout--;
> +	} while (cval_old != cval_new && timeout);
> +
> +	WARN_ON_ONCE(!timeout);

Then trigger the warning here, correct me if I miss something else.

Thanks
Hanjun
Hanjun Guo June 29, 2016, 8:11 a.m. UTC | #8
On 2016/5/13 18:24, Marc Zyngier wrote:
> On Thu, 12 May 2016 23:37:39 -0500
> Scott Wood <oss@buserror.net> wrote:
>
> Hi Scott,
>
>> Erratum A-008585 says that the ARM generic timer counter "has the
>> potential to contain an erroneous value for a small number of core
>> clock cycles every time the timer value changes".  Accesses to TVAL
>> (both read and write) are also affected due to the implicit counter
>> read.  Accesses to CVAL are not affected.
>>
>> The workaround is to reread TVAL and count registers until successive reads
>> return the same value, and when writing TVAL to retry until counter
>> reads before and after the write return the same value.
>>
>> This erratum can be found on LS1043A and LS2080A.
>>
>> Signed-off-by: Scott Wood <oss@buserror.net>
>> ---
>> v2:
>> Significant rework based on feedback, including using static_key,
>> disabling VDSO counter access rather than adding the workaround to the
>> VDSO, and uninlining the loops.
>>
>> Dropped the separate property for indicating that writes to TVAL are
>> affected, as I believe that's just a side effect of the implicit
>> counter read being corrupted, and thus a chip that is affected by one
>> will always be affected by the other.
>>
>> Dropped the arm32 portion as it seems there was confusion about whether
>> LS1021A is affected.  Currently I am being told that it is not
>> affected.
>>
>> I considered writing to CVAL rather than looping on TVAL writes, but
>> that would still have required separate set_next_event() code for the
>> erratum, and adding CVAL to the enum would have required a bunch of
>> extra handlers in switch statements (even where unused, due to compiler
>> warnings about unhandled enum values) including in an arm32 header.  It
>> seemed better to avoid the arm32 interaction and new untested
>> accessors.
>> ---
>>  .../devicetree/bindings/arm/arch_timer.txt         |   6 ++
>>  arch/arm64/include/asm/arch_timer.h                |  37 +++++--
>>  drivers/clocksource/arm_arch_timer.c               | 110 +++++++++++++++++++++
>>  3 files changed, 144 insertions(+), 9 deletions(-)
>>
>> diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt
>> index e774128..ef5fbe9 100644
>> --- a/Documentation/devicetree/bindings/arm/arch_timer.txt
>> +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
>> @@ -25,6 +25,12 @@ to deliver its interrupts via SPIs.
>>  - always-on : a boolean property. If present, the timer is powered through an
>>    always-on power domain, therefore it never loses context.
>>  
>> +- fsl,erratum-a008585 : A boolean property. Indicates the presence of
>> +  QorIQ erratum A-008585, which says that reading the counter is
>> +  unreliable unless the same value is returned by back-to-back reads.
>> +  This also affects writes to the tval register, due to the implicit
>> +  counter read.
>> +
>>  ** Optional properties:
>>  
>>  - arm,cpu-registers-not-fw-configured : Firmware does not initialize
> This should be part of a separate patch. Also, errata should be
> documented in Documentation/arm64/silicon-errata.txt.
>
>> diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
>> index fbe0ca3..9715e85 100644
>> --- a/arch/arm64/include/asm/arch_timer.h
>> +++ b/arch/arm64/include/asm/arch_timer.h
>> @@ -23,10 +23,33 @@
>>  
>>  #include <linux/bug.h>
>>  #include <linux/init.h>
>> +#include <linux/jump_label.h>
>>  #include <linux/types.h>
>>  
>>  #include <clocksource/arm_arch_timer.h>
>>  
>> +extern struct static_key_false arch_timer_read_ool_enabled;
>> +
>> +#define ARCH_TIMER_REG_READ(reg, func) \
>> +extern u64 func##_ool(void); \
>> +static inline u64 __##func(void) \
>> +{ \
>> +	u64 val; \
>> +	asm volatile("mrs %0, " reg : "=r" (val)); \
>> +	return val; \
>> +} \
>> +static inline u64 _##func(void) \
>> +{ \
>> +	if (static_branch_unlikely(&arch_timer_read_ool_enabled)) \
>> +		return func##_ool(); \
>> +	else \
>> +		return __##func(); \
>> +}
>> +
>> +ARCH_TIMER_REG_READ("cntp_tval_el0", arch_timer_get_ptval)
>> +ARCH_TIMER_REG_READ("cntv_tval_el0", arch_timer_get_vtval)
>> +ARCH_TIMER_REG_READ("cntvct_el0", arch_counter_get_cntvct)
>> +
> Given that this will have a (small) impact on non-affected platforms,
> it'd be good to have this guarded by a erratum-specific config option
> (CONFIG_FSL_ERRATUM_008585?) and turn it into trivial accessors when not
> defined.
>
>>  /*
>>   * These register accessors are marked inline so the compiler can
>>   * nicely work out which register we want, and chuck away the rest of
>> @@ -66,19 +89,19 @@ u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
>>  	if (access == ARCH_TIMER_PHYS_ACCESS) {
>>  		switch (reg) {
>>  		case ARCH_TIMER_REG_CTRL:
>> -			asm volatile("mrs %0,  cntp_ctl_el0" : "=r" (val));
>> +			asm volatile("mrs %0, cntp_ctl_el0" : "=r" (val));
> Spurious change?
>
>>  			break;
>>  		case ARCH_TIMER_REG_TVAL:
>> -			asm volatile("mrs %0, cntp_tval_el0" : "=r" (val));
>> +			val = _arch_timer_get_ptval();
>>  			break;
>>  		}
>>  	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
>>  		switch (reg) {
>>  		case ARCH_TIMER_REG_CTRL:
>> -			asm volatile("mrs %0,  cntv_ctl_el0" : "=r" (val));
>> +			asm volatile("mrs %0, cntv_ctl_el0" : "=r" (val));
> Same here.
>
>>  			break;
>>  		case ARCH_TIMER_REG_TVAL:
>> -			asm volatile("mrs %0, cntv_tval_el0" : "=r" (val));
>> +			val = _arch_timer_get_vtval();
>>  			break;
>>  		}
>>  	}
>> @@ -116,12 +139,8 @@ static inline u64 arch_counter_get_cntpct(void)
>>  
>>  static inline u64 arch_counter_get_cntvct(void)
>>  {
>> -	u64 cval;
>> -
>>  	isb();
>> -	asm volatile("mrs %0, cntvct_el0" : "=r" (cval));
>> -
>> -	return cval;
>> +	return _arch_counter_get_cntvct();
>>  }
>>  
>>  static inline int arch_timer_arch_init(void)
>> diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
>> index 5152b38..6f78831 100644
>> --- a/drivers/clocksource/arm_arch_timer.c
>> +++ b/drivers/clocksource/arm_arch_timer.c
>> @@ -79,10 +79,52 @@ static enum ppi_nr arch_timer_uses_ppi = VIRT_PPI;
>>  static bool arch_timer_c3stop;
>>  static bool arch_timer_mem_use_virtual;
>>  
>> +DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled);
>> +EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled);
>> +
> Once you have a config option, this can move to the guarded section
> below...
>
>>  /*
>>   * Architected system timer support.
>>   */
>>  
>> +#ifdef CONFIG_ARM64
> which can become CONFIG_FSL_ERRATUM_008585
>
>> +/*
>> + * __always_inline is used to ensure that func() is not an actual function
>> + * pointer, which would result in the register accesses potentially being too
>> + * far apart for the loop to work.
>> + */
>> +static __always_inline u64 arch_timer_reread(u64 (*func)(void))
> This is quite a generic function name. I'd expect something that refers
> to the erratum number.

This is a generic solution to reread the timer counter, how about using a generic one
here for more potential consumers in the future?

Thanks
Hanjun
Marc Zyngier June 29, 2016, 8:19 a.m. UTC | #9
On 29/06/16 08:56, Hanjun Guo wrote:
> Hello Scott,
> 
> On 2016/5/13 12:37, Scott Wood wrote:
> [...]
>>  
>> +#ifdef CONFIG_ARM64
>> +static __always_inline void rewrite_tval(const int access,
>> +		unsigned long evt, struct clock_event_device *clk)
>> +{
>> +	u64 cval_old, cval_new;
>> +	int timeout = 200;
>> +
>> +	do {
>> +		cval_old = __arch_counter_get_cntvct();
>> +		arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt, clk);
> 
> For not memory mapped timer, it will call arch_timer_reg_write_cp15() which has
> isb() at the end of arch_timer_reg_write_cp15()...
> 
>> +		cval_new = __arch_counter_get_cntvct();
> 
> So there is isb() between counter retry read, I think it's likely cval_new will
> not be equal with cval_old when the cntvct is correct (time lapse is more than
> one arch timer cycle).

Are you saying that the isb will delay the execution of the read so much
that your timer will always change? Well, if your timer is in the GHz
range, maybe. But that also implies that it is out of spec (it should be
in the 1-50MHz range).

Thanks,

	M.
Marc Zyngier June 29, 2016, 8:24 a.m. UTC | #10
On 29/06/16 09:11, Hanjun Guo wrote:
> On 2016/5/13 18:24, Marc Zyngier wrote:
>> On Thu, 12 May 2016 23:37:39 -0500
>> Scott Wood <oss@buserror.net> wrote:
>>
>> Hi Scott,
>>
>>> Erratum A-008585 says that the ARM generic timer counter "has the
>>> potential to contain an erroneous value for a small number of core
>>> clock cycles every time the timer value changes".  Accesses to TVAL
>>> (both read and write) are also affected due to the implicit counter
>>> read.  Accesses to CVAL are not affected.
>>>
>>> The workaround is to reread TVAL and count registers until successive reads
>>> return the same value, and when writing TVAL to retry until counter
>>> reads before and after the write return the same value.
>>>
>>> This erratum can be found on LS1043A and LS2080A.
>>>
>>> Signed-off-by: Scott Wood <oss@buserror.net>
>>> ---
>>> v2:
>>> Significant rework based on feedback, including using static_key,
>>> disabling VDSO counter access rather than adding the workaround to the
>>> VDSO, and uninlining the loops.
>>>
>>> Dropped the separate property for indicating that writes to TVAL are
>>> affected, as I believe that's just a side effect of the implicit
>>> counter read being corrupted, and thus a chip that is affected by one
>>> will always be affected by the other.
>>>
>>> Dropped the arm32 portion as it seems there was confusion about whether
>>> LS1021A is affected.  Currently I am being told that it is not
>>> affected.
>>>
>>> I considered writing to CVAL rather than looping on TVAL writes, but
>>> that would still have required separate set_next_event() code for the
>>> erratum, and adding CVAL to the enum would have required a bunch of
>>> extra handlers in switch statements (even where unused, due to compiler
>>> warnings about unhandled enum values) including in an arm32 header.  It
>>> seemed better to avoid the arm32 interaction and new untested
>>> accessors.
>>> ---
>>>  .../devicetree/bindings/arm/arch_timer.txt         |   6 ++
>>>  arch/arm64/include/asm/arch_timer.h                |  37 +++++--
>>>  drivers/clocksource/arm_arch_timer.c               | 110 +++++++++++++++++++++
>>>  3 files changed, 144 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt
>>> index e774128..ef5fbe9 100644
>>> --- a/Documentation/devicetree/bindings/arm/arch_timer.txt
>>> +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
>>> @@ -25,6 +25,12 @@ to deliver its interrupts via SPIs.
>>>  - always-on : a boolean property. If present, the timer is powered through an
>>>    always-on power domain, therefore it never loses context.
>>>  
>>> +- fsl,erratum-a008585 : A boolean property. Indicates the presence of
>>> +  QorIQ erratum A-008585, which says that reading the counter is
>>> +  unreliable unless the same value is returned by back-to-back reads.
>>> +  This also affects writes to the tval register, due to the implicit
>>> +  counter read.
>>> +
>>>  ** Optional properties:
>>>  
>>>  - arm,cpu-registers-not-fw-configured : Firmware does not initialize
>> This should be part of a separate patch. Also, errata should be
>> documented in Documentation/arm64/silicon-errata.txt.
>>
>>> diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
>>> index fbe0ca3..9715e85 100644
>>> --- a/arch/arm64/include/asm/arch_timer.h
>>> +++ b/arch/arm64/include/asm/arch_timer.h
>>> @@ -23,10 +23,33 @@
>>>  
>>>  #include <linux/bug.h>
>>>  #include <linux/init.h>
>>> +#include <linux/jump_label.h>
>>>  #include <linux/types.h>
>>>  
>>>  #include <clocksource/arm_arch_timer.h>
>>>  
>>> +extern struct static_key_false arch_timer_read_ool_enabled;
>>> +
>>> +#define ARCH_TIMER_REG_READ(reg, func) \
>>> +extern u64 func##_ool(void); \
>>> +static inline u64 __##func(void) \
>>> +{ \
>>> +	u64 val; \
>>> +	asm volatile("mrs %0, " reg : "=r" (val)); \
>>> +	return val; \
>>> +} \
>>> +static inline u64 _##func(void) \
>>> +{ \
>>> +	if (static_branch_unlikely(&arch_timer_read_ool_enabled)) \
>>> +		return func##_ool(); \
>>> +	else \
>>> +		return __##func(); \
>>> +}
>>> +
>>> +ARCH_TIMER_REG_READ("cntp_tval_el0", arch_timer_get_ptval)
>>> +ARCH_TIMER_REG_READ("cntv_tval_el0", arch_timer_get_vtval)
>>> +ARCH_TIMER_REG_READ("cntvct_el0", arch_counter_get_cntvct)
>>> +
>> Given that this will have a (small) impact on non-affected platforms,
>> it'd be good to have this guarded by a erratum-specific config option
>> (CONFIG_FSL_ERRATUM_008585?) and turn it into trivial accessors when not
>> defined.
>>
>>>  /*
>>>   * These register accessors are marked inline so the compiler can
>>>   * nicely work out which register we want, and chuck away the rest of
>>> @@ -66,19 +89,19 @@ u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
>>>  	if (access == ARCH_TIMER_PHYS_ACCESS) {
>>>  		switch (reg) {
>>>  		case ARCH_TIMER_REG_CTRL:
>>> -			asm volatile("mrs %0,  cntp_ctl_el0" : "=r" (val));
>>> +			asm volatile("mrs %0, cntp_ctl_el0" : "=r" (val));
>> Spurious change?
>>
>>>  			break;
>>>  		case ARCH_TIMER_REG_TVAL:
>>> -			asm volatile("mrs %0, cntp_tval_el0" : "=r" (val));
>>> +			val = _arch_timer_get_ptval();
>>>  			break;
>>>  		}
>>>  	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
>>>  		switch (reg) {
>>>  		case ARCH_TIMER_REG_CTRL:
>>> -			asm volatile("mrs %0,  cntv_ctl_el0" : "=r" (val));
>>> +			asm volatile("mrs %0, cntv_ctl_el0" : "=r" (val));
>> Same here.
>>
>>>  			break;
>>>  		case ARCH_TIMER_REG_TVAL:
>>> -			asm volatile("mrs %0, cntv_tval_el0" : "=r" (val));
>>> +			val = _arch_timer_get_vtval();
>>>  			break;
>>>  		}
>>>  	}
>>> @@ -116,12 +139,8 @@ static inline u64 arch_counter_get_cntpct(void)
>>>  
>>>  static inline u64 arch_counter_get_cntvct(void)
>>>  {
>>> -	u64 cval;
>>> -
>>>  	isb();
>>> -	asm volatile("mrs %0, cntvct_el0" : "=r" (cval));
>>> -
>>> -	return cval;
>>> +	return _arch_counter_get_cntvct();
>>>  }
>>>  
>>>  static inline int arch_timer_arch_init(void)
>>> diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
>>> index 5152b38..6f78831 100644
>>> --- a/drivers/clocksource/arm_arch_timer.c
>>> +++ b/drivers/clocksource/arm_arch_timer.c
>>> @@ -79,10 +79,52 @@ static enum ppi_nr arch_timer_uses_ppi = VIRT_PPI;
>>>  static bool arch_timer_c3stop;
>>>  static bool arch_timer_mem_use_virtual;
>>>  
>>> +DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled);
>>> +EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled);
>>> +
>> Once you have a config option, this can move to the guarded section
>> below...
>>
>>>  /*
>>>   * Architected system timer support.
>>>   */
>>>  
>>> +#ifdef CONFIG_ARM64
>> which can become CONFIG_FSL_ERRATUM_008585
>>
>>> +/*
>>> + * __always_inline is used to ensure that func() is not an actual function
>>> + * pointer, which would result in the register accesses potentially being too
>>> + * far apart for the loop to work.
>>> + */
>>> +static __always_inline u64 arch_timer_reread(u64 (*func)(void))
>> This is quite a generic function name. I'd expect something that refers
>> to the erratum number.
> 
> This is a generic solution to reread the timer counter, how about using a generic one
> here for more potential consumers in the future?

In that case, how about something like

"my_arch_timer_is_so_broken_that_I_have_to_poll_it"?

It should be generic enough, don't you think?

Joke aside, I want it to be clear that this is a workaround for an
erratum. This is not a general purpose function.

Thanks,

	M.
Ding Tianhong June 29, 2016, 8:31 a.m. UTC | #11
On 2016/6/29 16:19, Marc Zyngier wrote:
> On 29/06/16 08:56, Hanjun Guo wrote:
>> Hello Scott,
>>
>> On 2016/5/13 12:37, Scott Wood wrote:
>> [...]
>>>  
>>> +#ifdef CONFIG_ARM64
>>> +static __always_inline void rewrite_tval(const int access,
>>> +		unsigned long evt, struct clock_event_device *clk)
>>> +{
>>> +	u64 cval_old, cval_new;
>>> +	int timeout = 200;
>>> +
>>> +	do {
>>> +		cval_old = __arch_counter_get_cntvct();
>>> +		arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt, clk);
>>
>> For not memory mapped timer, it will call arch_timer_reg_write_cp15() which has
>> isb() at the end of arch_timer_reg_write_cp15()...
>>
>>> +		cval_new = __arch_counter_get_cntvct();
>>
>> So there is isb() between counter retry read, I think it's likely cval_new will
>> not be equal with cval_old when the cntvct is correct (time lapse is more than
>> one arch timer cycle).
> 
> Are you saying that the isb will delay the execution of the read so much
> that your timer will always change? Well, if your timer is in the GHz
> range, maybe. But that also implies that it is out of spec (it should be
> in the 1-50MHz range).
> 

I have test this patch on the D02 board, the arch timer is 50Mhz and the cpu is 2.1Ghz, 
if I don't remove the isb from the arch_timer_reg_write(xxx) function, the cval_new is always
bigger than the cval_old, and the timeout is always running to 0, so I don't think the
rewrite_tval is a correct solution for this problem. So I think the other board will met
the same problem.

Thanks.
Ding

> Thanks,
> 
> 	M.
>
Hanjun Guo June 29, 2016, 9:19 a.m. UTC | #12
On 2016/6/29 16:24, Marc Zyngier wrote:
> On 29/06/16 09:11, Hanjun Guo wrote:
>> On 2016/5/13 18:24, Marc Zyngier wrote:
>>> On Thu, 12 May 2016 23:37:39 -0500
>>> Scott Wood <oss@buserror.net> wrote:
>>>
>>> Hi Scott,
>>>
>>>> Erratum A-008585 says that the ARM generic timer counter "has the
>>>> potential to contain an erroneous value for a small number of core
>>>> clock cycles every time the timer value changes".  Accesses to TVAL
>>>> (both read and write) are also affected due to the implicit counter
>>>> read.  Accesses to CVAL are not affected.
>>>>
>>>> The workaround is to reread TVAL and count registers until successive reads
>>>> return the same value, and when writing TVAL to retry until counter
>>>> reads before and after the write return the same value.
>>>>
>>>> This erratum can be found on LS1043A and LS2080A.
>>>>
>>>> Signed-off-by: Scott Wood <oss@buserror.net>
>>>> ---
>>>> v2:
>>>> Significant rework based on feedback, including using static_key,
>>>> disabling VDSO counter access rather than adding the workaround to the
>>>> VDSO, and uninlining the loops.
>>>>
>>>> Dropped the separate property for indicating that writes to TVAL are
>>>> affected, as I believe that's just a side effect of the implicit
>>>> counter read being corrupted, and thus a chip that is affected by one
>>>> will always be affected by the other.
>>>>
>>>> Dropped the arm32 portion as it seems there was confusion about whether
>>>> LS1021A is affected.  Currently I am being told that it is not
>>>> affected.
>>>>
>>>> I considered writing to CVAL rather than looping on TVAL writes, but
>>>> that would still have required separate set_next_event() code for the
>>>> erratum, and adding CVAL to the enum would have required a bunch of
>>>> extra handlers in switch statements (even where unused, due to compiler
>>>> warnings about unhandled enum values) including in an arm32 header.  It
>>>> seemed better to avoid the arm32 interaction and new untested
>>>> accessors.
>>>> ---
>>>>  .../devicetree/bindings/arm/arch_timer.txt         |   6 ++
>>>>  arch/arm64/include/asm/arch_timer.h                |  37 +++++--
>>>>  drivers/clocksource/arm_arch_timer.c               | 110 +++++++++++++++++++++
>>>>  3 files changed, 144 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt
>>>> index e774128..ef5fbe9 100644
>>>> --- a/Documentation/devicetree/bindings/arm/arch_timer.txt
>>>> +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
>>>> @@ -25,6 +25,12 @@ to deliver its interrupts via SPIs.
>>>>  - always-on : a boolean property. If present, the timer is powered through an
>>>>    always-on power domain, therefore it never loses context.
>>>>  
>>>> +- fsl,erratum-a008585 : A boolean property. Indicates the presence of
>>>> +  QorIQ erratum A-008585, which says that reading the counter is
>>>> +  unreliable unless the same value is returned by back-to-back reads.
>>>> +  This also affects writes to the tval register, due to the implicit
>>>> +  counter read.
>>>> +
>>>>  ** Optional properties:
>>>>  
>>>>  - arm,cpu-registers-not-fw-configured : Firmware does not initialize
>>> This should be part of a separate patch. Also, errata should be
>>> documented in Documentation/arm64/silicon-errata.txt.
>>>
>>>> diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
>>>> index fbe0ca3..9715e85 100644
>>>> --- a/arch/arm64/include/asm/arch_timer.h
>>>> +++ b/arch/arm64/include/asm/arch_timer.h
>>>> @@ -23,10 +23,33 @@
>>>>  
>>>>  #include <linux/bug.h>
>>>>  #include <linux/init.h>
>>>> +#include <linux/jump_label.h>
>>>>  #include <linux/types.h>
>>>>  
>>>>  #include <clocksource/arm_arch_timer.h>
>>>>  
>>>> +extern struct static_key_false arch_timer_read_ool_enabled;
>>>> +
>>>> +#define ARCH_TIMER_REG_READ(reg, func) \
>>>> +extern u64 func##_ool(void); \
>>>> +static inline u64 __##func(void) \
>>>> +{ \
>>>> +	u64 val; \
>>>> +	asm volatile("mrs %0, " reg : "=r" (val)); \
>>>> +	return val; \
>>>> +} \
>>>> +static inline u64 _##func(void) \
>>>> +{ \
>>>> +	if (static_branch_unlikely(&arch_timer_read_ool_enabled)) \
>>>> +		return func##_ool(); \
>>>> +	else \
>>>> +		return __##func(); \
>>>> +}
>>>> +
>>>> +ARCH_TIMER_REG_READ("cntp_tval_el0", arch_timer_get_ptval)
>>>> +ARCH_TIMER_REG_READ("cntv_tval_el0", arch_timer_get_vtval)
>>>> +ARCH_TIMER_REG_READ("cntvct_el0", arch_counter_get_cntvct)
>>>> +
>>> Given that this will have a (small) impact on non-affected platforms,
>>> it'd be good to have this guarded by a erratum-specific config option
>>> (CONFIG_FSL_ERRATUM_008585?) and turn it into trivial accessors when not
>>> defined.
>>>
>>>>  /*
>>>>   * These register accessors are marked inline so the compiler can
>>>>   * nicely work out which register we want, and chuck away the rest of
>>>> @@ -66,19 +89,19 @@ u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
>>>>  	if (access == ARCH_TIMER_PHYS_ACCESS) {
>>>>  		switch (reg) {
>>>>  		case ARCH_TIMER_REG_CTRL:
>>>> -			asm volatile("mrs %0,  cntp_ctl_el0" : "=r" (val));
>>>> +			asm volatile("mrs %0, cntp_ctl_el0" : "=r" (val));
>>> Spurious change?
>>>
>>>>  			break;
>>>>  		case ARCH_TIMER_REG_TVAL:
>>>> -			asm volatile("mrs %0, cntp_tval_el0" : "=r" (val));
>>>> +			val = _arch_timer_get_ptval();
>>>>  			break;
>>>>  		}
>>>>  	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
>>>>  		switch (reg) {
>>>>  		case ARCH_TIMER_REG_CTRL:
>>>> -			asm volatile("mrs %0,  cntv_ctl_el0" : "=r" (val));
>>>> +			asm volatile("mrs %0, cntv_ctl_el0" : "=r" (val));
>>> Same here.
>>>
>>>>  			break;
>>>>  		case ARCH_TIMER_REG_TVAL:
>>>> -			asm volatile("mrs %0, cntv_tval_el0" : "=r" (val));
>>>> +			val = _arch_timer_get_vtval();
>>>>  			break;
>>>>  		}
>>>>  	}
>>>> @@ -116,12 +139,8 @@ static inline u64 arch_counter_get_cntpct(void)
>>>>  
>>>>  static inline u64 arch_counter_get_cntvct(void)
>>>>  {
>>>> -	u64 cval;
>>>> -
>>>>  	isb();
>>>> -	asm volatile("mrs %0, cntvct_el0" : "=r" (cval));
>>>> -
>>>> -	return cval;
>>>> +	return _arch_counter_get_cntvct();
>>>>  }
>>>>  
>>>>  static inline int arch_timer_arch_init(void)
>>>> diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
>>>> index 5152b38..6f78831 100644
>>>> --- a/drivers/clocksource/arm_arch_timer.c
>>>> +++ b/drivers/clocksource/arm_arch_timer.c
>>>> @@ -79,10 +79,52 @@ static enum ppi_nr arch_timer_uses_ppi = VIRT_PPI;
>>>>  static bool arch_timer_c3stop;
>>>>  static bool arch_timer_mem_use_virtual;
>>>>  
>>>> +DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled);
>>>> +EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled);
>>>> +
>>> Once you have a config option, this can move to the guarded section
>>> below...
>>>
>>>>  /*
>>>>   * Architected system timer support.
>>>>   */
>>>>  
>>>> +#ifdef CONFIG_ARM64
>>> which can become CONFIG_FSL_ERRATUM_008585
>>>
>>>> +/*
>>>> + * __always_inline is used to ensure that func() is not an actual function
>>>> + * pointer, which would result in the register accesses potentially being too
>>>> + * far apart for the loop to work.
>>>> + */
>>>> +static __always_inline u64 arch_timer_reread(u64 (*func)(void))
>>> This is quite a generic function name. I'd expect something that refers
>>> to the erratum number.
>> This is a generic solution to reread the timer counter, how about using a generic one
>> here for more potential consumers in the future?
> In that case, how about something like
>
> "my_arch_timer_is_so_broken_that_I_have_to_poll_it"?
>
> It should be generic enough, don't you think?

I will vote for it ;)

>
> Joke aside, I want it to be clear that this is a workaround for an
> erratum. This is not a general purpose function.
>

Fair enough, thanks for the clarify.

Regards
Hanjun
Crystal Wood July 1, 2016, 6:51 a.m. UTC | #13
On Mon, 2016-06-27 at 14:13 +0100, Marc Zyngier wrote:
> On 22/06/16 02:45, Scott Wood wrote:
> > 
> > On Fri, 2016-05-13 at 11:24 +0100, Marc Zyngier wrote:
> > > 
> > > On Thu, 12 May 2016 23:37:39 -0500
> > > Scott Wood <oss@buserror.net> wrote:
> > > 
> > > > +#ifdef CONFIG_ARM64
> > > > +static __always_inline void rewrite_tval(const int access,
> > > > +		unsigned long evt, struct clock_event_device *clk)
> > > > +{
> > > > +	u64 cval_old, cval_new;
> > > > +	int timeout = 200;
> > > > +
> > > > +	do {
> > > > +		cval_old = __arch_counter_get_cntvct();
> > > > +		arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL,
> > > > evt,
> > > > clk);
> > > > +		cval_new = __arch_counter_get_cntvct();
> > > Don't you need to guarantee the order of accesses here?
> > I'm not 100% sure.  The erratum workaround sample code doesn't show any
> > barriers, and adding more barriers could make it harder for the loop to
> > successfully complete.  There's already a barrier after the write, so the
> > only
> > concern should be whether the timer read could be reordered after the
> > timer
> > write, which could cause the loop to exit even if the write was bad.  Do
> > you
> > know if A53 or A57 will reorder a counter read relative to a tval write?
> I can't see any absolute guarantee that they wouldn't be reordered (but
> I have no insight on the micro-architecture either). I'd rather err on
> the side of caution here.

Adding another isb() before arch_timer_reg_write() causes the loop to not
complete with any reasonable timeout.  A testing loop (that repeatedly writes
to TVAL (using the workaround code), reads it back, and checks that the value
read back is not greater than the value that was written) shows that the
workaround without the extra isb() is effective -- lots of assertions without
the workaround, and none with it -- but I'll go with the cval workaround
instead.

-Scott
diff mbox

Patch

diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt
index e774128..ef5fbe9 100644
--- a/Documentation/devicetree/bindings/arm/arch_timer.txt
+++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
@@ -25,6 +25,12 @@  to deliver its interrupts via SPIs.
 - always-on : a boolean property. If present, the timer is powered through an
   always-on power domain, therefore it never loses context.
 
+- fsl,erratum-a008585 : A boolean property. Indicates the presence of
+  QorIQ erratum A-008585, which says that reading the counter is
+  unreliable unless the same value is returned by back-to-back reads.
+  This also affects writes to the tval register, due to the implicit
+  counter read.
+
 ** Optional properties:
 
 - arm,cpu-registers-not-fw-configured : Firmware does not initialize
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index fbe0ca3..9715e85 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -23,10 +23,33 @@ 
 
 #include <linux/bug.h>
 #include <linux/init.h>
+#include <linux/jump_label.h>
 #include <linux/types.h>
 
 #include <clocksource/arm_arch_timer.h>
 
+extern struct static_key_false arch_timer_read_ool_enabled;
+
+#define ARCH_TIMER_REG_READ(reg, func) \
+extern u64 func##_ool(void); \
+static inline u64 __##func(void) \
+{ \
+	u64 val; \
+	asm volatile("mrs %0, " reg : "=r" (val)); \
+	return val; \
+} \
+static inline u64 _##func(void) \
+{ \
+	if (static_branch_unlikely(&arch_timer_read_ool_enabled)) \
+		return func##_ool(); \
+	else \
+		return __##func(); \
+}
+
+ARCH_TIMER_REG_READ("cntp_tval_el0", arch_timer_get_ptval)
+ARCH_TIMER_REG_READ("cntv_tval_el0", arch_timer_get_vtval)
+ARCH_TIMER_REG_READ("cntvct_el0", arch_counter_get_cntvct)
+
 /*
  * These register accessors are marked inline so the compiler can
  * nicely work out which register we want, and chuck away the rest of
@@ -66,19 +89,19 @@  u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
 	if (access == ARCH_TIMER_PHYS_ACCESS) {
 		switch (reg) {
 		case ARCH_TIMER_REG_CTRL:
-			asm volatile("mrs %0,  cntp_ctl_el0" : "=r" (val));
+			asm volatile("mrs %0, cntp_ctl_el0" : "=r" (val));
 			break;
 		case ARCH_TIMER_REG_TVAL:
-			asm volatile("mrs %0, cntp_tval_el0" : "=r" (val));
+			val = _arch_timer_get_ptval();
 			break;
 		}
 	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
 		switch (reg) {
 		case ARCH_TIMER_REG_CTRL:
-			asm volatile("mrs %0,  cntv_ctl_el0" : "=r" (val));
+			asm volatile("mrs %0, cntv_ctl_el0" : "=r" (val));
 			break;
 		case ARCH_TIMER_REG_TVAL:
-			asm volatile("mrs %0, cntv_tval_el0" : "=r" (val));
+			val = _arch_timer_get_vtval();
 			break;
 		}
 	}
@@ -116,12 +139,8 @@  static inline u64 arch_counter_get_cntpct(void)
 
 static inline u64 arch_counter_get_cntvct(void)
 {
-	u64 cval;
-
 	isb();
-	asm volatile("mrs %0, cntvct_el0" : "=r" (cval));
-
-	return cval;
+	return _arch_counter_get_cntvct();
 }
 
 static inline int arch_timer_arch_init(void)
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 5152b38..6f78831 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -79,10 +79,52 @@  static enum ppi_nr arch_timer_uses_ppi = VIRT_PPI;
 static bool arch_timer_c3stop;
 static bool arch_timer_mem_use_virtual;
 
+DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled);
+EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled);
+
 /*
  * Architected system timer support.
  */
 
+#ifdef CONFIG_ARM64
+/*
+ * __always_inline is used to ensure that func() is not an actual function
+ * pointer, which would result in the register accesses potentially being too
+ * far apart for the loop to work.
+ */
+static __always_inline u64 arch_timer_reread(u64 (*func)(void))
+{
+	u64 cval_old, cval_new;
+	int timeout = 200;
+
+	do {
+		isb();
+		cval_old = func();
+		cval_new = func();
+		timeout--;
+	} while (cval_old != cval_new && timeout);
+
+	WARN_ON_ONCE(!timeout);
+	return cval_new;
+}
+
+u64 arch_counter_get_cntvct_ool(void)
+{
+	return arch_timer_reread(__arch_counter_get_cntvct);
+}
+
+u64 arch_timer_get_vtval_ool(void)
+{
+	return arch_timer_reread(__arch_timer_get_vtval);
+}
+
+u64 arch_timer_get_ptval_ool(void)
+{
+	return arch_timer_reread(__arch_timer_get_ptval);
+}
+
+#endif /* ARM64 */
+
 static __always_inline
 void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val,
 			  struct clock_event_device *clk)
@@ -232,6 +274,50 @@  static __always_inline void set_next_event(const int access, unsigned long evt,
 	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
 }
 
+#ifdef CONFIG_ARM64
+static __always_inline void rewrite_tval(const int access,
+		unsigned long evt, struct clock_event_device *clk)
+{
+	u64 cval_old, cval_new;
+	int timeout = 200;
+
+	do {
+		cval_old = __arch_counter_get_cntvct();
+		arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt, clk);
+		cval_new = __arch_counter_get_cntvct();
+		timeout--;
+	} while (cval_old != cval_new && timeout);
+
+	WARN_ON_ONCE(!timeout);
+}
+
+static __always_inline void set_next_event_errata(const int access,
+		unsigned long evt, struct clock_event_device *clk)
+{
+	unsigned long ctrl;
+
+	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
+	ctrl |= ARCH_TIMER_CTRL_ENABLE;
+	ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
+	rewrite_tval(access, evt, clk);
+	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
+}
+
+static int arch_timer_set_next_event_virt_errata(unsigned long evt,
+						 struct clock_event_device *clk)
+{
+	set_next_event_errata(ARCH_TIMER_VIRT_ACCESS, evt, clk);
+	return 0;
+}
+
+static int arch_timer_set_next_event_phys_errata(unsigned long evt,
+						 struct clock_event_device *clk)
+{
+	set_next_event_errata(ARCH_TIMER_PHYS_ACCESS, evt, clk);
+	return 0;
+}
+#endif /* ARM64 */
+
 static int arch_timer_set_next_event_virt(unsigned long evt,
 					  struct clock_event_device *clk)
 {
@@ -277,6 +363,13 @@  static void __arch_timer_setup(unsigned type,
 			clk->set_state_shutdown = arch_timer_shutdown_virt;
 			clk->set_state_oneshot_stopped = arch_timer_shutdown_virt;
 			clk->set_next_event = arch_timer_set_next_event_virt;
+
+#ifdef CONFIG_ARM64
+			if (static_branch_unlikely(&arch_timer_read_ool_enabled))
+				clk->set_next_event =
+					arch_timer_set_next_event_virt_errata;
+#endif
+
 			break;
 		case PHYS_SECURE_PPI:
 		case PHYS_NONSECURE_PPI:
@@ -284,6 +377,13 @@  static void __arch_timer_setup(unsigned type,
 			clk->set_state_shutdown = arch_timer_shutdown_phys;
 			clk->set_state_oneshot_stopped = arch_timer_shutdown_phys;
 			clk->set_next_event = arch_timer_set_next_event_phys;
+
+#ifdef CONFIG_ARM64
+			if (static_branch_unlikely(&arch_timer_read_ool_enabled))
+				clk->set_next_event =
+					arch_timer_set_next_event_phys_errata;
+#endif
+
 			break;
 		default:
 			BUG();
@@ -485,6 +585,13 @@  static void __init arch_counter_register(unsigned type)
 			arch_timer_read_counter = arch_counter_get_cntvct;
 		else
 			arch_timer_read_counter = arch_counter_get_cntpct;
+
+		/*
+		 * Don't use the vdso fastpath if errata require using
+		 * the out-of-line counter accessor.
+		 */
+		if (static_branch_unlikely(&arch_timer_read_ool_enabled))
+			clocksource_counter.name = "arch_sys_counter_ool";
 	} else {
 		arch_timer_read_counter = arch_counter_get_cntvct_mem;
 
@@ -763,6 +870,9 @@  static void __init arch_timer_of_init(struct device_node *np)
 
 	arch_timer_c3stop = !of_property_read_bool(np, "always-on");
 
+	if (of_property_read_bool(np, "fsl,erratum-a008585"))
+		static_branch_enable(&arch_timer_read_ool_enabled);
+
 	/*
 	 * If we cannot rely on firmware initializing the timer registers then
 	 * we should use the physical timers instead.